[llvm] [LV] Add extra check for signed oveflow for SDiv/SRem (PR #170818)

Fri Dec 5 01:31:03 PST 2025

https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/170818

>From 3d6c7f05c3c8cd1a2339b4aab04403e9ba7c50e5 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 5 Dec 2025 00:53:15 -0800
Subject: [PATCH] [LV] Add extra check for signed oveflow for SDiv/SRem

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 32 +++++++++++++++++--
 .../LoopVectorize/AArch64/invalid-costs.ll    |  3 +-
 .../Transforms/LoopVectorize/RISCV/divrem.ll  |  6 ++--
 .../LoopVectorize/RISCV/tail-folding-div.ll   |  6 ++--
 4 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f78acf31a0de2..c4844496ebea7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2856,6 +2856,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
   // having at least one active lane (the first). If the side-effects of the
   // instruction are invariant, executing it w/o (the tail-folding) mask is safe
   // - it will cause the same side-effects as when masked.
+  auto MayOverflow = [&](Instruction *I) {
+    auto *LHS = I->getOperand(0);
+    auto *RHS = I->getOperand(1);
+    return !this->Legal->isInvariant(LHS) &&
+           (!Legal->isInvariant(RHS) ||
+            (llvm::isa<llvm::ConstantInt>(RHS) &&
+             llvm::cast<llvm::ConstantInt>(RHS)->getValue().isAllOnes()));
+  };
   switch(I->getOpcode()) {
   default:
     llvm_unreachable(
@@ -2878,11 +2886,13 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
              TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
   }
   case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::SRem:
   case Instruction::URem:
     // If the divisor is loop-invariant no predication is needed.
     return !Legal->isInvariant(I->getOperand(1));
+  case Instruction::SDiv:
+  case Instruction::SRem:
+    // Extra check for signed overflow.
+    return !Legal->isInvariant(I->getOperand(1)) || MayOverflow(I);
   }
 }
 
@@ -7866,12 +7876,30 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
     // If not provably safe, use a select to form a safe divisor before widening the
     // div/rem operation itself.  Otherwise fall through to general handling below.
     if (CM.isPredicatedInst(I)) {
+      auto MayOverflow = [&](Instruction *I) {
+        Value *LHS = I->getOperand(0);
+        Value *RHS = I->getOperand(1);
+        return !Legal->isInvariant(LHS) &&
+               (!Legal->isInvariant(RHS) ||
+                (llvm::isa<llvm::ConstantInt>(RHS) &&
+                 llvm::cast<llvm::ConstantInt>(RHS)->getValue().isAllOnes()));
+      };
       SmallVector<VPValue *> Ops(VPI->operands());
       VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
       VPValue *One = Plan.getConstantInt(I->getType(), 1u);
       auto *SafeRHS =
           Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc());
       Ops[1] = SafeRHS;
+      if (VPI->getOpcode() == Instruction::SDiv ||
+          VPI->getOpcode() == Instruction::SRem) {
+        if (MayOverflow(I)) {
+          VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
+          VPValue *Zero = Plan.getConstantInt(I->getType(), 0);
+          auto *SafeLHS =
+              Builder.createSelect(Mask, Ops[0], Zero, VPI->getDebugLoc());
+          Ops[0] = SafeLHS;
+        }
+      }
       return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc());
     }
     [[fallthrough]];
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
index cc3b1c9c9db8a..2d2fdbaa3170b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
@@ -32,7 +32,8 @@ define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noal
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext <vscale x 4 x i32> [[TMP9]] to <vscale x 4 x i64>
 ; CHECK-NEXT:    [[TMP11:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i64> [[TMP7]], <vscale x 4 x i64> splat (i64 1)
-; CHECK-NEXT:    [[TMP12:%.*]] = sdiv <vscale x 4 x i64> [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i64> [[TMP10]], <vscale x 4 x i64> zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = sdiv <vscale x 4 x i64> [[TMP16]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = trunc <vscale x 4 x i64> [[TMP12]] to <vscale x 4 x i32>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
index e4ba6fe9d757d..de235a85349d7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
@@ -575,7 +575,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 -128)
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> splat (i8 -1), <vscale x 16 x i8> splat (i8 1), i32 [[TMP12]])
-; CHECK-NEXT:    [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[WIDE_LOAD]], [[TMP10]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[WIDE_LOAD]], <vscale x 16 x i8> zeroinitializer, i32 [[TMP12]])
+; CHECK-NEXT:    [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[TMP4]], [[TMP10]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[WIDE_LOAD]]
 ; CHECK-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[PREDPHI]], ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
@@ -599,7 +600,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
 ; FIXED-NEXT:    [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
 ; FIXED-NEXT:    [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128)
 ; FIXED-NEXT:    [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1)
-; FIXED-NEXT:    [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]]
+; FIXED-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[WIDE_LOAD1]], <32 x i8> zeroinitializer
+; FIXED-NEXT:    [[TMP9:%.*]] = sdiv <32 x i8> [[TMP3]], [[TMP7]]
 ; FIXED-NEXT:    [[PREDPHI2:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP9]], <32 x i8> [[WIDE_LOAD1]]
 ; FIXED-NEXT:    store <32 x i8> [[PREDPHI2]], ptr [[TMP1]], align 1
 ; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
index 8cd540c3888db..9d4274a559f5f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
@@ -23,7 +23,8 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; IF-EVL-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
 ; IF-EVL-NEXT:    [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
-; IF-EVL-NEXT:    [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
+; IF-EVL-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> zeroinitializer, i32 [[TMP5]])
+; IF-EVL-NEXT:    [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[TMP4]], [[TMP11]]
 ; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
 ; IF-EVL-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
@@ -214,7 +215,8 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; IF-EVL-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
 ; IF-EVL-NEXT:    [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
-; IF-EVL-NEXT:    [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
+; IF-EVL-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> zeroinitializer, i32 [[TMP5]])
+; IF-EVL-NEXT:    [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[TMP4]], [[TMP11]]
 ; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
 ; IF-EVL-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP5]] to i64