[llvm] [CVP][LVI] Add support for vectors (PR #97428)

Tue Jul 2 08:34:52 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Nikita Popov (nikic)

<details>
<summary>Changes</summary>

The core change here is to add support for converting vector constants into constant ranges. The rest is just relaxing isIntegerTy() checks and making sure we don't use APIs that assume vectors.

There are a couple of places that don't support vectors yet, most notably the "simplest" fold (comparisons to a constant) isn't supported yet. I'll leave these to a followup.

---

Patch is 20.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97428.diff


4 Files Affected:

- (modified) llvm/lib/Analysis/LazyValueInfo.cpp (+24-4) 
- (modified) llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp (+10-43) 
- (modified) llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll (+1-3) 
- (modified) llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll (+32-13) 


``````````diff

diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index d8b03eaa3928f..da7827c5e3bfa 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -650,7 +650,7 @@ LazyValueInfoImpl::solveBlockValueImpl(Value *Val, BasicBlock *BB) {
   if (PT && isKnownNonZero(BBI, DL))
     return ValueLatticeElement::getNot(ConstantPointerNull::get(PT));
 
-  if (BBI->getType()->isIntegerTy()) {
+  if (BBI->getType()->isIntOrIntVectorTy()) {
     if (auto *CI = dyn_cast<CastInst>(BBI))
       return solveBlockValueCast(CI, BB);
 
@@ -836,6 +836,24 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
   }
 }
 
+static ConstantRange getConstantRangeFromVector(Constant *C,
+                                                FixedVectorType *Ty) {
+  unsigned BW = Ty->getScalarSizeInBits();
+  ConstantRange CR = ConstantRange::getEmpty(BW);
+  for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
+    Constant *Elem = C->getAggregateElement(I);
+    if (!Elem)
+      return ConstantRange::getFull(BW);
+    if (isa<PoisonValue>(Elem))
+      continue;
+    auto *CI = dyn_cast<ConstantInt>(Elem);
+    if (!CI)
+      return ConstantRange::getFull(BW);
+    CR = CR.unionWith(CI->getValue());
+  }
+  return CR;
+}
+
 static ConstantRange toConstantRange(const ValueLatticeElement &Val,
                                      Type *Ty, bool UndefAllowed = false) {
   assert(Ty->isIntOrIntVectorTy() && "Must be integer type");
@@ -844,6 +862,9 @@ static ConstantRange toConstantRange(const ValueLatticeElement &Val,
   unsigned BW = Ty->getScalarSizeInBits();
   if (Val.isUnknown())
     return ConstantRange::getEmpty(BW);
+  if (Val.isConstant())
+    if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
+      return getConstantRangeFromVector(Val.getConstant(), VTy);
   return ConstantRange::getFull(BW);
 }
 
@@ -968,7 +989,7 @@ LazyValueInfoImpl::solveBlockValueCast(CastInst *CI, BasicBlock *BB) {
     return std::nullopt;
   const ConstantRange &LHSRange = *LHSRes;
 
-  const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
+  const unsigned ResultBitWidth = CI->getType()->getScalarSizeInBits();
 
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
@@ -1108,7 +1129,7 @@ LazyValueInfoImpl::getValueFromSimpleICmpCondition(CmpInst::Predicate Pred,
                                                    const APInt &Offset,
                                                    Instruction *CxtI,
                                                    bool UseBlockValue) {
-  ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(),
+  ConstantRange RHSRange(RHS->getType()->getScalarSizeInBits(),
                          /*isFullSet=*/true);
   if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
     RHSRange = ConstantRange(CI->getValue());
@@ -1728,7 +1749,6 @@ Constant *LazyValueInfo::getConstant(Value *V, Instruction *CxtI) {
 
 ConstantRange LazyValueInfo::getConstantRange(Value *V, Instruction *CxtI,
                                               bool UndefAllowed) {
-  assert(V->getType()->isIntegerTy());
   BasicBlock *BB = CxtI->getParent();
   ValueLatticeElement Result =
       getOrCreateImpl(BB->getModule()).getValueInBlock(V, BB, CxtI);
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 875d3ea78fae5..34304c2245e30 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -288,9 +288,8 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
 }
 
 static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
-  // Only for signed relational comparisons of scalar integers.
-  if (Cmp->getType()->isVectorTy() ||
-      !Cmp->getOperand(0)->getType()->isIntegerTy())
+  // Only for signed relational comparisons of integers.
+  if (!Cmp->getOperand(0)->getType()->isIntOrIntVectorTy())
     return false;
 
   if (!Cmp->isSigned())
@@ -505,12 +504,8 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI);
 // because it is negation-invariant.
 static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
   Value *X = II->getArgOperand(0);
-  Type *Ty = X->getType();
-  if (!Ty->isIntegerTy())
-    return false;
-
   bool IsIntMinPoison = cast<ConstantInt>(II->getArgOperand(1))->isOne();
-  APInt IntMin = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
+  APInt IntMin = APInt::getSignedMinValue(X->getType()->getScalarSizeInBits());
   ConstantRange Range = LVI->getConstantRangeAtUse(
       II->getOperandUse(0), /*UndefAllowed*/ IsIntMinPoison);
 
@@ -679,15 +674,13 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
   }
 
   if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) {
-    if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
+    if (willNotOverflow(WO, LVI))
       return processOverflowIntrinsic(WO, LVI);
-    }
   }
 
   if (auto *SI = dyn_cast<SaturatingInst>(&CB)) {
-    if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) {
+    if (willNotOverflow(SI, LVI))
       return processSaturatingInst(SI, LVI);
-    }
   }
 
   bool Changed = false;
@@ -761,11 +754,10 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR,
                              const ConstantRange &RCR) {
   assert(Instr->getOpcode() == Instruction::SDiv ||
          Instr->getOpcode() == Instruction::SRem);
-  assert(!Instr->getType()->isVectorTy());
 
   // Find the smallest power of two bitwidth that's sufficient to hold Instr's
   // operands.
-  unsigned OrigWidth = Instr->getType()->getIntegerBitWidth();
+  unsigned OrigWidth = Instr->getType()->getScalarSizeInBits();
 
   // What is the smallest bit width that can accommodate the entire value ranges
   // of both of the operands?
@@ -788,7 +780,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR,
 
   ++NumSDivSRemsNarrowed;
   IRBuilder<> B{Instr};
-  auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
+  auto *TruncTy = Instr->getType()->getWithNewBitWidth(NewWidth);
   auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
                                      Instr->getName() + ".lhs.trunc");
   auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
@@ -809,7 +801,6 @@ static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
   Type *Ty = Instr->getType();
   assert(Instr->getOpcode() == Instruction::UDiv ||
          Instr->getOpcode() == Instruction::URem);
-  assert(!Ty->isVectorTy());
   bool IsRem = Instr->getOpcode() == Instruction::URem;
 
   Value *X = Instr->getOperand(0);
@@ -892,7 +883,6 @@ static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
                              const ConstantRange &YCR) {
   assert(Instr->getOpcode() == Instruction::UDiv ||
          Instr->getOpcode() == Instruction::URem);
-  assert(!Instr->getType()->isVectorTy());
 
   // Find the smallest power of two bitwidth that's sufficient to hold Instr's
   // operands.
@@ -905,12 +895,12 @@ static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
 
   // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
   // two.
-  if (NewWidth >= Instr->getType()->getIntegerBitWidth())
+  if (NewWidth >= Instr->getType()->getScalarSizeInBits())
     return false;
 
   ++NumUDivURemsNarrowed;
   IRBuilder<> B{Instr};
-  auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
+  auto *TruncTy = Instr->getType()->getWithNewBitWidth(NewWidth);
   auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
                                      Instr->getName() + ".lhs.trunc");
   auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
@@ -929,9 +919,6 @@ static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
 static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
   assert(Instr->getOpcode() == Instruction::UDiv ||
          Instr->getOpcode() == Instruction::URem);
-  if (Instr->getType()->isVectorTy())
-    return false;
-
   ConstantRange XCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0),
                                                  /*UndefAllowed*/ false);
   // Allow undef for RHS, as we can assume it is division by zero UB.
@@ -946,7 +933,6 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
 static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR,
                         const ConstantRange &RCR, LazyValueInfo *LVI) {
   assert(SDI->getOpcode() == Instruction::SRem);
-  assert(!SDI->getType()->isVectorTy());
 
   if (LCR.abs().icmp(CmpInst::ICMP_ULT, RCR.abs())) {
     SDI->replaceAllUsesWith(SDI->getOperand(0));
@@ -1006,7 +992,6 @@ static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR,
 static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
                         const ConstantRange &RCR, LazyValueInfo *LVI) {
   assert(SDI->getOpcode() == Instruction::SDiv);
-  assert(!SDI->getType()->isVectorTy());
 
   // Check whether the division folds to a constant.
   ConstantRange DivCR = LCR.sdiv(RCR);
@@ -1064,9 +1049,6 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
 static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
   assert(Instr->getOpcode() == Instruction::SDiv ||
          Instr->getOpcode() == Instruction::SRem);
-  if (Instr->getType()->isVectorTy())
-    return false;
-
   ConstantRange LCR =
       LVI->getConstantRangeAtUse(Instr->getOperandUse(0), /*AllowUndef*/ false);
   // Allow undef for RHS, as we can assume it is division by zero UB.
@@ -1085,12 +1067,9 @@ static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
 }
 
 static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
-  if (SDI->getType()->isVectorTy())
-    return false;
-
   ConstantRange LRange =
       LVI->getConstantRangeAtUse(SDI->getOperandUse(0), /*UndefAllowed*/ false);
-  unsigned OrigWidth = SDI->getType()->getIntegerBitWidth();
+  unsigned OrigWidth = SDI->getType()->getScalarSizeInBits();
   ConstantRange NegOneOrZero =
       ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1));
   if (NegOneOrZero.contains(LRange)) {
@@ -1117,9 +1096,6 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
 }
 
 static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
-  if (SDI->getType()->isVectorTy())
-    return false;
-
   const Use &Base = SDI->getOperandUse(0);
   if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
            .isAllNonNegative())
@@ -1138,9 +1114,6 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
 }
 
 static bool processPossibleNonNeg(PossiblyNonNegInst *I, LazyValueInfo *LVI) {
-  if (I->getType()->isVectorTy())
-    return false;
-
   if (I->hasNonNeg())
     return false;
 
@@ -1164,9 +1137,6 @@ static bool processUIToFP(UIToFPInst *UIToFP, LazyValueInfo *LVI) {
 }
 
 static bool processSIToFP(SIToFPInst *SIToFP, LazyValueInfo *LVI) {
-  if (SIToFP->getType()->isVectorTy())
-    return false;
-
   const Use &Base = SIToFP->getOperandUse(0);
   if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
            .isAllNonNegative())
@@ -1187,9 +1157,6 @@ static bool processSIToFP(SIToFPInst *SIToFP, LazyValueInfo *LVI) {
 static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
   using OBO = OverflowingBinaryOperator;
 
-  if (BinOp->getType()->isVectorTy())
-    return false;
-
   bool NSW = BinOp->hasNoSignedWrap();
   bool NUW = BinOp->hasNoUnsignedWrap();
   if (NSW && NUW)
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
index ca70713440219..200793918f0ef 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
@@ -1246,13 +1246,11 @@ define i1 @non_const_range_minmax(i8 %a, i8 %b) {
   ret i1 %cmp1
 }
 
-; FIXME: Also support vectors.
 define <2 x i1> @non_const_range_minmax_vec(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @non_const_range_minmax_vec(
 ; CHECK-NEXT:    [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 10, i8 10>)
 ; CHECK-NEXT:    [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> <i8 11, i8 11>)
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i8> [[A2]], [[B2]]
-; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
 ;
   %a2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
   %b2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %b, <2 x i8> <i8 11, i8 11>)
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
index 9862dd56e31b2..bdb3a4d92947d 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -passes=correlated-propagation < %s | FileCheck %s
 
+; TODO: Add support for this.
 define <2 x i1> @cmp1(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i1> @cmp1(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
@@ -13,6 +14,7 @@ define <2 x i1> @cmp1(<2 x i8> %a) {
   ret <2 x i1> %cmp
 }
 
+; TODO: Add support for this.
 define <2 x i1> @cmp2(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i1> @cmp2(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
@@ -29,7 +31,7 @@ define <2 x i1> @cmp_signedness(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i1> @cmp_signedness(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i16> [[ZEXT]], <i16 5, i16 5>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i16> [[ZEXT]], <i16 5, i16 5>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -41,7 +43,7 @@ define <2 x i16> @infer_nowrap(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i16> @infer_nowrap(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = add <2 x i16> [[ZEXT]], <i16 1, i16 1>
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], <i16 1, i16 1>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -53,7 +55,7 @@ define <2 x i16> @infer_nowrap_nonsplat(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i16> @infer_nowrap_nonsplat(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = add <2 x i16> [[ZEXT]], <i16 1, i16 2>
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], <i16 1, i16 2>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -61,11 +63,23 @@ define <2 x i16> @infer_nowrap_nonsplat(<2 x i8> %a) {
   ret <2 x i16> %res
 }
 
+define <2 x i16> @infer_nowrap_poison(<2 x i8> %a) {
+; CHECK-LABEL: define <2 x i16> @infer_nowrap_poison(
+; CHECK-SAME: <2 x i8> [[A:%.*]]) {
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], <i16 1, i16 poison>
+; CHECK-NEXT:    ret <2 x i16> [[RES]]
+;
+  %zext = zext <2 x i8> %a to <2 x i16>
+  %res = add <2 x i16> %zext, <i16 1, i16 poison>
+  ret <2 x i16> %res
+}
+
 define <2 x i16> @infer_nowrap_nonsplat_nsw_only(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i16> @infer_nowrap_nonsplat_nsw_only(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = add <2 x i16> [[ZEXT]], <i16 1, i16 -1>
+; CHECK-NEXT:    [[RES:%.*]] = add nsw <2 x i16> [[ZEXT]], <i16 1, i16 -1>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -77,8 +91,7 @@ define <2 x i16> @abs(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i16> @abs(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[ZEXT]], i1 false)
-; CHECK-NEXT:    ret <2 x i16> [[RES]]
+; CHECK-NEXT:    ret <2 x i16> [[ZEXT]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
   %res = call <2 x i16> @llvm.abs(<2 x i16> %zext, i1 false)
@@ -89,7 +102,7 @@ define <2 x i16> @saturating(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i16> @saturating(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ZEXT]], <2 x i16> <i16 1, i16 1>)
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], <i16 1, i16 1>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -101,7 +114,8 @@ define {<2 x i16>, <2 x i1>} @with_overflow(<2 x i8> %a) {
 ; CHECK-LABEL: define { <2 x i16>, <2 x i1> } @with_overflow(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = call { <2 x i16>, <2 x i1> } @llvm.uadd.with.overflow.v2i16(<2 x i16> [[ZEXT]], <2 x i16> <i16 1, i16 1>)
+; CHECK-NEXT:    [[RES1:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], <i16 1, i16 1>
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { <2 x i16>, <2 x i1> } { <2 x i16> poison, <2 x i1> zeroinitializer }, <2 x i16> [[RES1]], 0
 ; CHECK-NEXT:    ret { <2 x i16>, <2 x i1> } [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -113,7 +127,9 @@ define <2 x i16> @srem1(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i16> @srem1(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = srem <2 x i16> [[ZEXT]], <i16 42, i16 42>
+; CHECK-NEXT:    [[RES1_LHS_TRUNC:%.*]] = trunc <2 x i16> [[ZEXT]] to <2 x i8>
+; CHECK-NEXT:    [[RES12:%.*]] = urem <2 x i8> [[RES1_LHS_TRUNC]], <i8 42, i8 42>
+; CHECK-NEXT:    [[RES:%.*]] = zext <2 x i8> [[RES12]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -125,7 +141,9 @@ define <2 x i16> @srem2(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i16> @srem2(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = sext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = srem <2 x i16> [[ZEXT]], <i16 42, i16 42>
+; CHECK-NEXT:    [[RES_LHS_TRUNC:%.*]] = trunc <2 x i16> [[ZEXT]] to <2 x i8>
+; CHECK-NEXT:    [[RES1:%.*]] = srem <2 x i8> [[RES_LHS_TRUNC]], <i8 42, i8 42>
+; CHECK-NEXT:    [[RES:%.*]] = sext <2 x i8> [[RES1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = sext <2 x i8> %a to <2 x i16>
@@ -137,7 +155,7 @@ define <2 x i16> @ashr(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i16> @ashr(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = ashr <2 x i16> [[ZEXT]], <i16 1, i16 1>
+; CHECK-NEXT:    [[RES:%.*]] = lshr <2 x i16> [[ZEXT]], <i16 1, i16 1>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -149,7 +167,7 @@ define <2 x i32> @sext(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i32> @sext(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = sext <2 x i16> [[ZEXT]] to <2 x i32>
+; CHECK-NEXT:    [[RES:%.*]] = zext nneg <2 x i16> [[ZEXT]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -161,7 +179,7 @@ define <2 x float> @sitofp(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x float> @sitofp(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = sitofp <2 x i16> [[ZEXT]] to <2 x float>
+; CHECK-NEXT:    [[RES:%.*]] = uitofp nneg <2 x i16> [[ZEXT]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -169,6 +187,7 @@ define <2 x float> @sitofp(<2 x i8> %a) {
   ret <2 x float> %res
 }
 
...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/97428