[llvm] [LV] Add extra check for signed oveflow for SDiv/SRem (PR #170818)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 01:31:03 PST 2025
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/170818
>From 3d6c7f05c3c8cd1a2339b4aab04403e9ba7c50e5 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 5 Dec 2025 00:53:15 -0800
Subject: [PATCH] [LV] Add extra check for signed oveflow for SDiv/SRem
---
.../Transforms/Vectorize/LoopVectorize.cpp | 32 +++++++++++++++++--
.../LoopVectorize/AArch64/invalid-costs.ll | 3 +-
.../Transforms/LoopVectorize/RISCV/divrem.ll | 6 ++--
.../LoopVectorize/RISCV/tail-folding-div.ll | 6 ++--
4 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f78acf31a0de2..c4844496ebea7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2856,6 +2856,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// having at least one active lane (the first). If the side-effects of the
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
// - it will cause the same side-effects as when masked.
+ auto MayOverflow = [&](Instruction *I) {
+ auto *LHS = I->getOperand(0);
+ auto *RHS = I->getOperand(1);
+ return !this->Legal->isInvariant(LHS) &&
+ (!Legal->isInvariant(RHS) ||
+ (llvm::isa<llvm::ConstantInt>(RHS) &&
+ llvm::cast<llvm::ConstantInt>(RHS)->getValue().isAllOnes()));
+ };
switch(I->getOpcode()) {
default:
llvm_unreachable(
@@ -2878,11 +2886,13 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
}
case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
case Instruction::URem:
// If the divisor is loop-invariant no predication is needed.
return !Legal->isInvariant(I->getOperand(1));
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ // Extra check for signed overflow.
+ return !Legal->isInvariant(I->getOperand(1)) || MayOverflow(I);
}
}
@@ -7866,12 +7876,30 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
// If not provably safe, use a select to form a safe divisor before widening the
// div/rem operation itself. Otherwise fall through to general handling below.
if (CM.isPredicatedInst(I)) {
+ auto MayOverflow = [&](Instruction *I) {
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ return !Legal->isInvariant(LHS) &&
+ (!Legal->isInvariant(RHS) ||
+ (llvm::isa<llvm::ConstantInt>(RHS) &&
+ llvm::cast<llvm::ConstantInt>(RHS)->getValue().isAllOnes()));
+ };
SmallVector<VPValue *> Ops(VPI->operands());
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
VPValue *One = Plan.getConstantInt(I->getType(), 1u);
auto *SafeRHS =
Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc());
Ops[1] = SafeRHS;
+ if (VPI->getOpcode() == Instruction::SDiv ||
+ VPI->getOpcode() == Instruction::SRem) {
+ if (MayOverflow(I)) {
+ VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
+ VPValue *Zero = Plan.getConstantInt(I->getType(), 0);
+ auto *SafeLHS =
+ Builder.createSelect(Mask, Ops[0], Zero, VPI->getDebugLoc());
+ Ops[0] = SafeLHS;
+ }
+ }
return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc());
}
[[fallthrough]];
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
index cc3b1c9c9db8a..2d2fdbaa3170b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
@@ -32,7 +32,8 @@ define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noal
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP10:%.*]] = sext <vscale x 4 x i32> [[TMP9]] to <vscale x 4 x i64>
; CHECK-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i64> [[TMP7]], <vscale x 4 x i64> splat (i64 1)
-; CHECK-NEXT: [[TMP12:%.*]] = sdiv <vscale x 4 x i64> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i64> [[TMP10]], <vscale x 4 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sdiv <vscale x 4 x i64> [[TMP16]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = trunc <vscale x 4 x i64> [[TMP12]] to <vscale x 4 x i32>
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
index e4ba6fe9d757d..de235a85349d7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
@@ -575,7 +575,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 -128)
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> splat (i8 -1), <vscale x 16 x i8> splat (i8 1), i32 [[TMP12]])
-; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[WIDE_LOAD]], [[TMP10]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[WIDE_LOAD]], <vscale x 16 x i8> zeroinitializer, i32 [[TMP12]])
+; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[TMP4]], [[TMP10]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[WIDE_LOAD]]
; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[PREDPHI]], ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
@@ -599,7 +600,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128)
; FIXED-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1)
-; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]]
+; FIXED-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[WIDE_LOAD1]], <32 x i8> zeroinitializer
+; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[TMP3]], [[TMP7]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP9]], <32 x i8> [[WIDE_LOAD1]]
; FIXED-NEXT: store <32 x i8> [[PREDPHI2]], ptr [[TMP1]], align 1
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
index 8cd540c3888db..9d4274a559f5f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
@@ -23,7 +23,8 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
-; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
+; IF-EVL-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> zeroinitializer, i32 [[TMP5]])
+; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[TMP4]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
@@ -214,7 +215,8 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
-; IF-EVL-NEXT: [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
+; IF-EVL-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> zeroinitializer, i32 [[TMP5]])
+; IF-EVL-NEXT: [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[TMP4]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
More information about the llvm-commits
mailing list