[llvm] [InstSimplify] Infer icmp from with.overflow intrinsics (PR #75511)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 10:15:53 PST 2023
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/75511
This patch tries to simplify the pattern `Overflow | icmp pred Res, C` into `Overflow` or `true`, where `Overflow` and `Res` are return values of a call to `with.overflow` intrinsic.
Alive2: https://alive2.llvm.org/ce/z/4-LEV2
Fixes #75360.
>From 1a47a8f76fd7defef5aea2d4ddb213c4883f0047 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 15 Dec 2023 01:50:41 +0800
Subject: [PATCH 1/2] [InstSimplify] Add pre-commit tests for PR75360. NFC.
---
.../InstCombine/sadd-with-overflow.ll | 92 +++++++++++++++++++
1 file changed, 92 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
index 4b37ccbe3370b6..87663a4f1349b9 100644
--- a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
+++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
@@ -122,3 +122,95 @@ define { i32, i1 } @fold_sub_simple(i32 %x) {
%b = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 30)
ret { i32, i1 } %b
}
+
+; Tests from PR75360
+define i1 @ckd_add_unsigned(i31 %num) {
+; CHECK-LABEL: @ckd_add_unsigned(
+; CHECK-NEXT: [[A0:%.*]] = zext i31 [[NUM:%.*]] to i32
+; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0]], i32 1)
+; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
+; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
+; CHECK-NEXT: [[A4:%.*]] = icmp slt i32 [[A3]], 0
+; CHECK-NEXT: [[A5:%.*]] = or i1 [[A2]], [[A4]]
+; CHECK-NEXT: ret i1 [[A5]]
+;
+ %a0 = zext i31 %num to i32
+ %a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
+ %a2 = extractvalue { i32, i1 } %a1, 1
+ %a3 = extractvalue { i32, i1 } %a1, 0
+ %a4 = icmp slt i32 %a3, 0
+ %a5 = or i1 %a2, %a4
+ ret i1 %a5
+}
+
+define i1 @ckd_add_unsigned_commuted(i31 %num) {
+; CHECK-LABEL: @ckd_add_unsigned_commuted(
+; CHECK-NEXT: [[A0:%.*]] = zext i31 [[NUM:%.*]] to i32
+; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0]], i32 1)
+; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
+; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
+; CHECK-NEXT: [[A4:%.*]] = icmp slt i32 [[A3]], 0
+; CHECK-NEXT: [[A5:%.*]] = or i1 [[A4]], [[A2]]
+; CHECK-NEXT: ret i1 [[A5]]
+;
+ %a0 = zext i31 %num to i32
+ %a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
+ %a2 = extractvalue { i32, i1 } %a1, 1
+ %a3 = extractvalue { i32, i1 } %a1, 0
+ %a4 = icmp slt i32 %a3, 0
+ %a5 = or i1 %a4, %a2
+ ret i1 %a5
+}
+
+define i1 @ckd_add_unsigned_imply_true(i31 %num) {
+; CHECK-LABEL: @ckd_add_unsigned_imply_true(
+; CHECK-NEXT: [[A0:%.*]] = zext i31 [[NUM:%.*]] to i32
+; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0]], i32 1)
+; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
+; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
+; CHECK-NEXT: [[A4:%.*]] = icmp sgt i32 [[A3]], -1
+; CHECK-NEXT: [[A5:%.*]] = or i1 [[A2]], [[A4]]
+; CHECK-NEXT: ret i1 [[A5]]
+;
+ %a0 = zext i31 %num to i32
+ %a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
+ %a2 = extractvalue { i32, i1 } %a1, 1
+ %a3 = extractvalue { i32, i1 } %a1, 0
+ %a4 = icmp sgt i32 %a3, -1
+ %a5 = or i1 %a2, %a4
+ ret i1 %a5
+}
+
+define i1 @ckd_add_unsigned_fail1(i32 %a0) {
+; CHECK-LABEL: @ckd_add_unsigned_fail1(
+; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0:%.*]], i32 1)
+; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
+; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
+; CHECK-NEXT: [[A4:%.*]] = icmp slt i32 [[A3]], 0
+; CHECK-NEXT: [[A5:%.*]] = or i1 [[A2]], [[A4]]
+; CHECK-NEXT: ret i1 [[A5]]
+;
+ %a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
+ %a2 = extractvalue { i32, i1 } %a1, 1
+ %a3 = extractvalue { i32, i1 } %a1, 0
+ %a4 = icmp slt i32 %a3, 0
+ %a5 = or i1 %a2, %a4
+ ret i1 %a5
+}
+
+define i1 @ckd_add_unsigned_fail2(i32 %a0) {
+; CHECK-LABEL: @ckd_add_unsigned_fail2(
+; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0:%.*]], i32 1)
+; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
+; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
+; CHECK-NEXT: [[A4:%.*]] = icmp slt i32 [[A3]], -1
+; CHECK-NEXT: [[A5:%.*]] = or i1 [[A2]], [[A4]]
+; CHECK-NEXT: ret i1 [[A5]]
+;
+ %a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
+ %a2 = extractvalue { i32, i1 } %a1, 1
+ %a3 = extractvalue { i32, i1 } %a1, 0
+ %a4 = icmp slt i32 %a3, -1
+ %a5 = or i1 %a2, %a4
+ ret i1 %a5
+}
>From abb11191073c191399eddec4b48d2eb9403158d4 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 15 Dec 2023 01:53:27 +0800
Subject: [PATCH 2/2] [InstSimplify] Infer icmp from with.overflow intrinsics
---
llvm/include/llvm/Analysis/ValueTracking.h | 5 +++
llvm/lib/Analysis/InstructionSimplify.cpp | 35 ++++++++++++++++++
llvm/lib/Analysis/ValueTracking.cpp | 8 ++---
.../InstCombine/sadd-with-overflow.ll | 36 ++++++-------------
4 files changed, 55 insertions(+), 29 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index a3186e61b94adf..baa16306ebf5df 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -863,6 +863,11 @@ ConstantRange computeConstantRange(const Value *V, bool ForSigned,
const DominatorTree *DT = nullptr,
unsigned Depth = 0);
+/// Combine constant ranges from computeConstantRange() and computeKnownBits().
+ConstantRange
+computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
+ bool ForSigned, const SimplifyQuery &SQ);
+
/// Return true if this function can prove that the instruction I will
/// always transfer execution to one of its successors (including the next
/// instruction that follows within a basic block). E.g. this is not
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 2a45acf63aa2ca..fa2e42a4c22e60 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2313,6 +2313,36 @@ static Value *simplifyOrLogic(Value *X, Value *Y) {
return nullptr;
}
+/// Res, Overflow = xxx_with_overflow X, Y
+/// Try to simplify the pattern "Overflow | icmp pred Res, C".
+static Value *simplifyOrOfICmpAndWithOverflow(Value *Op0, Value *Op1,
+ const SimplifyQuery &SQ) {
+ const WithOverflowInst *WO;
+ const APInt *C;
+ ICmpInst::Predicate Pred;
+ if (!match(Op0, m_ExtractValue<1>(m_WithOverflowInst(WO))) ||
+ !match(Op1, m_ICmp(Pred, m_ExtractValue<0>(m_Specific(WO)), m_APInt(C))))
+ return nullptr;
+
+ // See if we can infer the result of icmp from the nowrap flag.
+ const auto LHS_CR = llvm::computeConstantRangeIncludingKnownBits(
+ WO->getLHS(), ICmpInst::isSigned(Pred), SQ);
+ const auto RHS_CR = llvm::computeConstantRangeIncludingKnownBits(
+ WO->getRHS(), ICmpInst::isSigned(Pred), SQ);
+ const auto DomCR = LHS_CR.overflowingBinaryOp(WO->getBinaryOp(), RHS_CR,
+ WO->getNoWrapKind());
+ const auto CR = llvm::ConstantRange::makeExactICmpRegion(Pred, *C);
+
+ ConstantRange Intersection = DomCR.intersectWith(CR);
+ ConstantRange Difference = DomCR.difference(CR);
+ if (Intersection.isEmptySet())
+ return Op0;
+ if (Difference.isEmptySet())
+ return ConstantInt::getTrue(Op0->getType());
+
+ return nullptr;
+}
+
/// Given operands for an Or, see if we can fold the result.
/// If not, this returns null.
static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -2480,6 +2510,11 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (*Implied == true)
return ConstantInt::getTrue(Op1->getType());
}
+
+ if (auto *V = simplifyOrOfICmpAndWithOverflow(Op0, Op1, Q))
+ return V;
+ if (auto *V = simplifyOrOfICmpAndWithOverflow(Op1, Op0, Q))
+ return V;
}
if (Value *V = simplifyByDomEq(Instruction::Or, Op0, Op1, Q, MaxRecurse))
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5445746ab2a1bc..e5469a6d659090 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6228,10 +6228,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
}
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
-static ConstantRange
-computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
- bool ForSigned,
- const SimplifyQuery &SQ) {
+ConstantRange
+llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
+ bool ForSigned,
+ const SimplifyQuery &SQ) {
ConstantRange CR1 =
ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
index 87663a4f1349b9..a784028ec3a245 100644
--- a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
+++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
@@ -126,13 +126,8 @@ define { i32, i1 } @fold_sub_simple(i32 %x) {
; Tests from PR75360
define i1 @ckd_add_unsigned(i31 %num) {
; CHECK-LABEL: @ckd_add_unsigned(
-; CHECK-NEXT: [[A0:%.*]] = zext i31 [[NUM:%.*]] to i32
-; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0]], i32 1)
-; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
-; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
-; CHECK-NEXT: [[A4:%.*]] = icmp slt i32 [[A3]], 0
-; CHECK-NEXT: [[A5:%.*]] = or i1 [[A2]], [[A4]]
-; CHECK-NEXT: ret i1 [[A5]]
+; CHECK-NEXT: [[A2:%.*]] = icmp eq i31 [[NUM:%.*]], -1
+; CHECK-NEXT: ret i1 [[A2]]
;
%a0 = zext i31 %num to i32
%a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
@@ -145,13 +140,8 @@ define i1 @ckd_add_unsigned(i31 %num) {
define i1 @ckd_add_unsigned_commuted(i31 %num) {
; CHECK-LABEL: @ckd_add_unsigned_commuted(
-; CHECK-NEXT: [[A0:%.*]] = zext i31 [[NUM:%.*]] to i32
-; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0]], i32 1)
-; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
-; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
-; CHECK-NEXT: [[A4:%.*]] = icmp slt i32 [[A3]], 0
-; CHECK-NEXT: [[A5:%.*]] = or i1 [[A4]], [[A2]]
-; CHECK-NEXT: ret i1 [[A5]]
+; CHECK-NEXT: [[A2:%.*]] = icmp eq i31 [[NUM:%.*]], -1
+; CHECK-NEXT: ret i1 [[A2]]
;
%a0 = zext i31 %num to i32
%a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
@@ -164,13 +154,7 @@ define i1 @ckd_add_unsigned_commuted(i31 %num) {
define i1 @ckd_add_unsigned_imply_true(i31 %num) {
; CHECK-LABEL: @ckd_add_unsigned_imply_true(
-; CHECK-NEXT: [[A0:%.*]] = zext i31 [[NUM:%.*]] to i32
-; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0]], i32 1)
-; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
-; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
-; CHECK-NEXT: [[A4:%.*]] = icmp sgt i32 [[A3]], -1
-; CHECK-NEXT: [[A5:%.*]] = or i1 [[A2]], [[A4]]
-; CHECK-NEXT: ret i1 [[A5]]
+; CHECK-NEXT: ret i1 true
;
%a0 = zext i31 %num to i32
%a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
@@ -198,19 +182,21 @@ define i1 @ckd_add_unsigned_fail1(i32 %a0) {
ret i1 %a5
}
-define i1 @ckd_add_unsigned_fail2(i32 %a0) {
+define i1 @ckd_add_unsigned_fail2(i31 %num) {
; CHECK-LABEL: @ckd_add_unsigned_fail2(
-; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0:%.*]], i32 1)
+; CHECK-NEXT: [[A0:%.*]] = zext i31 [[NUM:%.*]] to i32
+; CHECK-NEXT: [[A1:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0]], i32 1)
; CHECK-NEXT: [[A2:%.*]] = extractvalue { i32, i1 } [[A1]], 1
; CHECK-NEXT: [[A3:%.*]] = extractvalue { i32, i1 } [[A1]], 0
-; CHECK-NEXT: [[A4:%.*]] = icmp slt i32 [[A3]], -1
+; CHECK-NEXT: [[A4:%.*]] = icmp slt i32 [[A3]], 2
; CHECK-NEXT: [[A5:%.*]] = or i1 [[A2]], [[A4]]
; CHECK-NEXT: ret i1 [[A5]]
;
+ %a0 = zext i31 %num to i32
%a1 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a0, i32 1)
%a2 = extractvalue { i32, i1 } %a1, 1
%a3 = extractvalue { i32, i1 } %a1, 0
- %a4 = icmp slt i32 %a3, -1
+ %a4 = icmp slt i32 %a3, 2
%a5 = or i1 %a2, %a4
ret i1 %a5
}
More information about the llvm-commits
mailing list