[llvm] [InstCombine] Optimize sub(sext(add(x, y)), sext(add(x, z))). (PR #144174)
Slava Zakharin via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 15 17:37:19 PDT 2025
https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/144174
>From 55f236af1e9915d8e83ef30e423ee2e3818bd594 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 13 Jun 2025 17:25:24 -0700
Subject: [PATCH 1/4] [InstCombine] Optimize
sub(sext(add(x,y)),sext(add(x,z))).
This pattern can be often met in Flang generated LLVM IR,
for example, for the counts of the loops generated for array
expressions like: `a(x:x+y)` or `a(x+z:x+z)` or their variations.
In order to compute the loop count, Flang needs to subtract
the lower bound of the array slice from the upper bound
of the array slice. To avoid the sign wraps, it sign extends
the original values (that may be of any user data type)
to `i64`.
This peephole is really helpful in CPU2017/548.exchange2,
where we have multiple following statements like this:
```
block(row+1:row+2, 7:9, i7) = block(row+1:row+2, 7:9, i7) - 10
```
While this is just a 2x3 iterations loop nest, LLVM cannot
figure it out, ending up vectorizing the inner loop really
hard (with a vector epilog and scalar remainder). This, in turn,
causes problems for LSR that ends up creating too many loop-carried
values in the loop containing the above statement, which are then
causing too many spills/reloads.
Alive2: https://alive2.llvm.org/ce/z/gLgfYX
Related to #143219.
---
.../InstCombine/InstCombineAddSub.cpp | 56 ++++++++++++
.../Transforms/InstCombine/sub-sext-add.ll | 89 +++++++++++++++++++
2 files changed, 145 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/sub-sext-add.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c1ce364eb1794..35de76d50672d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2807,6 +2807,62 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
return Res;
+ // (sub[ nsw][ nuw] (sext (add nsw (X, Y)), sext (X))) --> (sext (Y))
+ {
+ Value *Add0;
+ if (match(Op0, m_SExt(m_Value(Add0))) &&
+ match(Add0, m_Add(m_Value(X), m_Value(Y))) &&
+ match(Op1, m_SExt(m_Specific(X)))) {
+ auto *OBO0 = cast<OverflowingBinaryOperator>(Add0);
+ if (OBO0->hasNoSignedWrap()) {
+ // Non-constant Y requires new SExt.
+ unsigned numOfNewInstrs = !isa<Constant>(Y) ? 1 : 0;
+ // Check if we can trade some of the old instructions for the new ones.
+ unsigned numOfDeadInstrs = 0;
+ numOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
+ numOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
+ numOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
+ if (numOfDeadInstrs >= numOfNewInstrs) {
+ Value *SExtY = Builder.CreateSExt(Y, I.getType());
+ return replaceInstUsesWith(I, SExtY);
+ }
+ }
+ }
+ }
+
+ // (sub[ nsw] (sext (add nsw (X, Y)), sext (add nsw (X, Z)))) -->
+ // --> (sub[ nsw] (sext (Y), sext(Z)))
+ {
+ Value *Z, *Add0, *Add1;
+ if (match(Op0, m_SExt(m_Value(Add0))) &&
+ match(Add0, m_Add(m_Value(X), m_Value(Y))) &&
+ match(Op1, m_SExt(m_Value(Add1))) &&
+ match(Add1, m_Add(m_Specific(X), m_Value(Z)))) {
+ auto *OBO0 = cast<OverflowingBinaryOperator>(Add0);
+ auto *OBO1 = cast<OverflowingBinaryOperator>(Add1);
+ if (OBO0->hasNoSignedWrap() && OBO1->hasNoSignedWrap()) {
+ unsigned numOfNewInstrs = 0;
+ // Non-constant Y, Z require new SExt.
+ numOfNewInstrs += !isa<Constant>(Y) ? 1 : 0;
+ numOfNewInstrs += !isa<Constant>(Z) ? 1 : 0;
+ // Check if we can trade some of the old instructions for the new ones.
+ unsigned numOfDeadInstrs = 0;
+ numOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
+ numOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
+ numOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
+ numOfDeadInstrs += Add1->hasOneUse() ? 1 : 0;
+ if (numOfDeadInstrs >= numOfNewInstrs) {
+ Value *SExtY = Builder.CreateSExt(Y, I.getType());
+ Value *SExtZ = Builder.CreateSExt(Z, I.getType());
+ Value *Sub = Builder.CreateSub(SExtY, SExtZ, "",
+ /* HasNUW */ false,
+ /* HasNSW */ I.hasNoSignedWrap());
+ return replaceInstUsesWith(I, Sub);
+ }
+ }
+ }
+ }
+
return TryToNarrowDeduceFlags();
}
diff --git a/llvm/test/Transforms/InstCombine/sub-sext-add.ll b/llvm/test/Transforms/InstCombine/sub-sext-add.ll
new file mode 100644
index 0000000000000..8b12acdf95ba5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sub-sext-add.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i64 @src_2add_2sext_sub(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: define i64 @src_2add_2sext_sub(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
+; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y]] to i64
+; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[Z]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %add2 = add nsw i32 %x, %z
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %add2 to i64
+ %sub = sub i64 %sext1, %sext2
+ ret i64 %sub
+}
+
+define i64 @src_2add_2sext_sub_nsw(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: define i64 @src_2add_2sext_sub_nsw(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
+; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y]] to i64
+; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[Z]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %add2 = add nsw i32 %x, %z
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %add2 to i64
+ %sub = sub nsw i64 %sext1, %sext2
+ ret i64 %sub
+}
+
+define i64 @src_2add_2sext_sub_nuw(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: define i64 @src_2add_2sext_sub_nuw(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[Y]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Z]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %add2 = add nsw i32 %x, %z
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %add2 to i64
+ %sub = sub nuw i64 %sext1, %sext2
+ ret i64 %sub
+}
+
+define i64 @src_x_add_2sext_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: define i64 @src_x_add_2sext_sub(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %x to i64
+ %sub = sub i64 %sext1, %sext2
+ ret i64 %sub
+}
+
+define i64 @src_x_add_2sext_sub_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: define i64 @src_x_add_2sext_sub_nsw(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %x to i64
+ %sub = sub nsw i64 %sext1, %sext2
+ ret i64 %sub
+}
+
+define i64 @src_x_add_2sext_sub_nuw(i32 %x, i32 %y) {
+; CHECK-LABEL: define i64 @src_x_add_2sext_sub_nuw(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %x to i64
+ %sub = sub nuw i64 %sext1, %sext2
+ ret i64 %sub
+}
>From 435e3f920138dd263adca328d702bb16ec325466 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 13 Jun 2025 19:47:53 -0700
Subject: [PATCH 2/4] Addressed review comments. Added commutative cases.
---
llvm/include/llvm/IR/PatternMatch.h | 8 ++
.../InstCombine/InstCombineAddSub.cpp | 72 +++++++-------
.../Transforms/InstCombine/sub-sext-add.ll | 99 ++++++++++++++-----
3 files changed, 116 insertions(+), 63 deletions(-)
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 2eaa7d0faabc1..1f86cdfd94e17 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -1323,6 +1323,14 @@ m_NSWAdd(const LHS &L, const RHS &R) {
R);
}
template <typename LHS, typename RHS>
+inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
+ OverflowingBinaryOperator::NoSignedWrap, true>
+m_c_NSWAdd(const LHS &L, const RHS &R) {
+ return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
+ OverflowingBinaryOperator::NoSignedWrap,
+ true>(L, R);
+}
+template <typename LHS, typename RHS>
inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
OverflowingBinaryOperator::NoSignedWrap>
m_NSWSub(const LHS &L, const RHS &R) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 35de76d50672d..b8c0465c06ddf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2810,22 +2810,18 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
// (sub[ nsw][ nuw] (sext (add nsw (X, Y)), sext (X))) --> (sext (Y))
{
Value *Add0;
- if (match(Op0, m_SExt(m_Value(Add0))) &&
- match(Add0, m_Add(m_Value(X), m_Value(Y))) &&
- match(Op1, m_SExt(m_Specific(X)))) {
- auto *OBO0 = cast<OverflowingBinaryOperator>(Add0);
- if (OBO0->hasNoSignedWrap()) {
- // Non-constant Y requires new SExt.
- unsigned numOfNewInstrs = !isa<Constant>(Y) ? 1 : 0;
- // Check if we can trade some of the old instructions for the new ones.
- unsigned numOfDeadInstrs = 0;
- numOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
- numOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
- numOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
- if (numOfDeadInstrs >= numOfNewInstrs) {
- Value *SExtY = Builder.CreateSExt(Y, I.getType());
- return replaceInstUsesWith(I, SExtY);
- }
+ if (match(Op1, m_SExt(m_Value(X))) && match(Op0, m_SExt(m_Value(Add0))) &&
+ match(Add0, m_c_NSWAdd(m_Specific(X), m_Value(Y)))) {
+ // Non-constant Y requires new SExt.
+ unsigned NumOfNewInstrs = !isa<Constant>(Y) ? 1 : 0;
+ // Check if we can trade some of the old instructions for the new ones.
+ unsigned NumOfDeadInstrs = 0;
+ NumOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
+ NumOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
+ NumOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
+ if (NumOfDeadInstrs >= NumOfNewInstrs) {
+ Value *SExtY = Builder.CreateSExt(Y, I.getType());
+ return replaceInstUsesWith(I, SExtY);
}
}
}
@@ -2835,30 +2831,28 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
{
Value *Z, *Add0, *Add1;
if (match(Op0, m_SExt(m_Value(Add0))) &&
- match(Add0, m_Add(m_Value(X), m_Value(Y))) &&
match(Op1, m_SExt(m_Value(Add1))) &&
- match(Add1, m_Add(m_Specific(X), m_Value(Z)))) {
- auto *OBO0 = cast<OverflowingBinaryOperator>(Add0);
- auto *OBO1 = cast<OverflowingBinaryOperator>(Add1);
- if (OBO0->hasNoSignedWrap() && OBO1->hasNoSignedWrap()) {
- unsigned numOfNewInstrs = 0;
- // Non-constant Y, Z require new SExt.
- numOfNewInstrs += !isa<Constant>(Y) ? 1 : 0;
- numOfNewInstrs += !isa<Constant>(Z) ? 1 : 0;
- // Check if we can trade some of the old instructions for the new ones.
- unsigned numOfDeadInstrs = 0;
- numOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
- numOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
- numOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
- numOfDeadInstrs += Add1->hasOneUse() ? 1 : 0;
- if (numOfDeadInstrs >= numOfNewInstrs) {
- Value *SExtY = Builder.CreateSExt(Y, I.getType());
- Value *SExtZ = Builder.CreateSExt(Z, I.getType());
- Value *Sub = Builder.CreateSub(SExtY, SExtZ, "",
- /* HasNUW */ false,
- /* HasNSW */ I.hasNoSignedWrap());
- return replaceInstUsesWith(I, Sub);
- }
+ ((match(Add0, m_NSWAdd(m_Value(X), m_Value(Y))) &&
+ match(Add1, m_c_NSWAdd(m_Specific(X), m_Value(Z)))) ||
+ (match(Add0, m_NSWAdd(m_Value(Y), m_Value(X))) &&
+ match(Add1, m_c_NSWAdd(m_Specific(X), m_Value(Z)))))) {
+ unsigned NumOfNewInstrs = 0;
+ // Non-constant Y, Z require new SExt.
+ NumOfNewInstrs += !isa<Constant>(Y) ? 1 : 0;
+ NumOfNewInstrs += !isa<Constant>(Z) ? 1 : 0;
+ // Check if we can trade some of the old instructions for the new ones.
+ unsigned NumOfDeadInstrs = 0;
+ NumOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
+ NumOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
+ NumOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
+ NumOfDeadInstrs += Add1->hasOneUse() ? 1 : 0;
+ if (NumOfDeadInstrs >= NumOfNewInstrs) {
+ Value *SExtY = Builder.CreateSExt(Y, I.getType());
+ Value *SExtZ = Builder.CreateSExt(Z, I.getType());
+ Value *Sub = Builder.CreateSub(SExtY, SExtZ, "",
+ /* HasNUW */ false,
+ /* HasNSW */ I.hasNoSignedWrap());
+ return replaceInstUsesWith(I, Sub);
}
}
}
diff --git a/llvm/test/Transforms/InstCombine/sub-sext-add.ll b/llvm/test/Transforms/InstCombine/sub-sext-add.ll
index 8b12acdf95ba5..fc693c428ab61 100644
--- a/llvm/test/Transforms/InstCombine/sub-sext-add.ll
+++ b/llvm/test/Transforms/InstCombine/sub-sext-add.ll
@@ -1,12 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-define i64 @src_2add_2sext_sub(i32 %x, i32 %y, i32 %z) {
-; CHECK-LABEL: define i64 @src_2add_2sext_sub(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
-; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y]] to i64
-; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[Z]] to i64
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
+define i64 @src_2add_2sext_sub_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @src_2add_2sext_sub_1(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Z:%.*]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i64 [[SUB]]
;
%add1 = add nsw i32 %x, %y
@@ -17,11 +16,55 @@ define i64 @src_2add_2sext_sub(i32 %x, i32 %y, i32 %z) {
ret i64 %sub
}
+define i64 @src_2add_2sext_sub_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @src_2add_2sext_sub_2(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Z:%.*]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %add2 = add nsw i32 %z, %x
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %add2 to i64
+ %sub = sub i64 %sext1, %sext2
+ ret i64 %sub
+}
+
+define i64 @src_2add_2sext_sub_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @src_2add_2sext_sub_3(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Z:%.*]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %y, %x
+ %add2 = add nsw i32 %z, %x
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %add2 to i64
+ %sub = sub i64 %sext1, %sext2
+ ret i64 %sub
+}
+
+define i64 @src_2add_2sext_sub_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @src_2add_2sext_sub_4(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Z:%.*]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %y, %x
+ %add2 = add nsw i32 %x, %z
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %add2 to i64
+ %sub = sub i64 %sext1, %sext2
+ ret i64 %sub
+}
+
define i64 @src_2add_2sext_sub_nsw(i32 %x, i32 %y, i32 %z) {
-; CHECK-LABEL: define i64 @src_2add_2sext_sub_nsw(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
-; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y]] to i64
-; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[Z]] to i64
+; CHECK-LABEL: @src_2add_2sext_sub_nsw(
+; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[Z:%.*]] to i64
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
; CHECK-NEXT: ret i64 [[SUB]]
;
@@ -34,10 +77,9 @@ define i64 @src_2add_2sext_sub_nsw(i32 %x, i32 %y, i32 %z) {
}
define i64 @src_2add_2sext_sub_nuw(i32 %x, i32 %y, i32 %z) {
-; CHECK-LABEL: define i64 @src_2add_2sext_sub_nuw(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[Y]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Z]] to i64
+; CHECK-LABEL: @src_2add_2sext_sub_nuw(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Z:%.*]] to i64
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i64 [[SUB]]
;
@@ -49,10 +91,9 @@ define i64 @src_2add_2sext_sub_nuw(i32 %x, i32 %y, i32 %z) {
ret i64 %sub
}
-define i64 @src_x_add_2sext_sub(i32 %x, i32 %y) {
-; CHECK-LABEL: define i64 @src_x_add_2sext_sub(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
+define i64 @src_x_add_2sext_sub_1(i32 %x, i32 %y) {
+; CHECK-LABEL: @src_x_add_2sext_sub_1(
+; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y:%.*]] to i64
; CHECK-NEXT: ret i64 [[SUB]]
;
%add1 = add nsw i32 %x, %y
@@ -62,10 +103,21 @@ define i64 @src_x_add_2sext_sub(i32 %x, i32 %y) {
ret i64 %sub
}
+define i64 @src_x_add_2sext_sub_2(i32 %x, i32 %y) {
+; CHECK-LABEL: @src_x_add_2sext_sub_2(
+; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %y, %x
+ %sext1 = sext i32 %add1 to i64
+ %sext2 = sext i32 %x to i64
+ %sub = sub i64 %sext1, %sext2
+ ret i64 %sub
+}
+
define i64 @src_x_add_2sext_sub_nsw(i32 %x, i32 %y) {
-; CHECK-LABEL: define i64 @src_x_add_2sext_sub_nsw(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
+; CHECK-LABEL: @src_x_add_2sext_sub_nsw(
+; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y:%.*]] to i64
; CHECK-NEXT: ret i64 [[SUB]]
;
%add1 = add nsw i32 %x, %y
@@ -76,9 +128,8 @@ define i64 @src_x_add_2sext_sub_nsw(i32 %x, i32 %y) {
}
define i64 @src_x_add_2sext_sub_nuw(i32 %x, i32 %y) {
-; CHECK-LABEL: define i64 @src_x_add_2sext_sub_nuw(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
+; CHECK-LABEL: @src_x_add_2sext_sub_nuw(
+; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y:%.*]] to i64
; CHECK-NEXT: ret i64 [[SUB]]
;
%add1 = add nsw i32 %x, %y
>From 43902266dd2cdda15757e3359d0b1ba973988f43 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 13 Jun 2025 22:29:22 -0700
Subject: [PATCH 3/4] Fixed profitability mistake.
---
.../InstCombine/InstCombineAddSub.cpp | 22 +++++--------------
1 file changed, 5 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index b8c0465c06ddf..8dca5b87ccb3c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2807,23 +2807,11 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
return Res;
- // (sub[ nsw][ nuw] (sext (add nsw (X, Y)), sext (X))) --> (sext (Y))
- {
- Value *Add0;
- if (match(Op1, m_SExt(m_Value(X))) && match(Op0, m_SExt(m_Value(Add0))) &&
- match(Add0, m_c_NSWAdd(m_Specific(X), m_Value(Y)))) {
- // Non-constant Y requires new SExt.
- unsigned NumOfNewInstrs = !isa<Constant>(Y) ? 1 : 0;
- // Check if we can trade some of the old instructions for the new ones.
- unsigned NumOfDeadInstrs = 0;
- NumOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
- NumOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
- NumOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
- if (NumOfDeadInstrs >= NumOfNewInstrs) {
- Value *SExtY = Builder.CreateSExt(Y, I.getType());
- return replaceInstUsesWith(I, SExtY);
- }
- }
+ // (sub (sext (add nsw (X, Y)), sext (X))) --> (sext (Y))
+ if (match(Op1, m_SExt(m_Value(X))) &&
+ match(Op0, m_SExt(m_c_NSWAdd(m_Specific(X), m_Value(Y))))) {
+ Value *SExtY = Builder.CreateSExt(Y, I.getType());
+ return replaceInstUsesWith(I, SExtY);
}
// (sub[ nsw] (sext (add nsw (X, Y)), sext (add nsw (X, Z)))) -->
>From 1f2818242af24148a8c9604cb903cbee155bb21a Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Sun, 15 Jun 2025 17:36:47 -0700
Subject: [PATCH 4/4] Used m_SExtLike matchers.
---
.../InstCombine/InstCombineAddSub.cpp | 8 +++---
.../Transforms/InstCombine/sub-sext-add.ll | 27 +++++++++++++++++++
2 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 8dca5b87ccb3c..fb1273e23e24e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2808,8 +2808,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return Res;
// (sub (sext (add nsw (X, Y)), sext (X))) --> (sext (Y))
- if (match(Op1, m_SExt(m_Value(X))) &&
- match(Op0, m_SExt(m_c_NSWAdd(m_Specific(X), m_Value(Y))))) {
+ if (match(Op1, m_SExtLike(m_Value(X))) &&
+ match(Op0, m_SExtLike(m_c_NSWAdd(m_Specific(X), m_Value(Y))))) {
Value *SExtY = Builder.CreateSExt(Y, I.getType());
return replaceInstUsesWith(I, SExtY);
}
@@ -2818,8 +2818,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
// --> (sub[ nsw] (sext (Y), sext(Z)))
{
Value *Z, *Add0, *Add1;
- if (match(Op0, m_SExt(m_Value(Add0))) &&
- match(Op1, m_SExt(m_Value(Add1))) &&
+ if (match(Op0, m_SExtLike(m_Value(Add0))) &&
+ match(Op1, m_SExtLike(m_Value(Add1))) &&
((match(Add0, m_NSWAdd(m_Value(X), m_Value(Y))) &&
match(Add1, m_c_NSWAdd(m_Specific(X), m_Value(Z)))) ||
(match(Add0, m_NSWAdd(m_Value(Y), m_Value(X))) &&
diff --git a/llvm/test/Transforms/InstCombine/sub-sext-add.ll b/llvm/test/Transforms/InstCombine/sub-sext-add.ll
index fc693c428ab61..e0675f1d80131 100644
--- a/llvm/test/Transforms/InstCombine/sub-sext-add.ll
+++ b/llvm/test/Transforms/InstCombine/sub-sext-add.ll
@@ -61,6 +61,21 @@ define i64 @src_2add_2sext_sub_4(i32 %x, i32 %y, i32 %z) {
ret i64 %sub
}
+define i64 @src_2add_2sextlike_sub(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @src_2add_2sextlike_sub(
+; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[Z:%.*]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %add2 = add nsw i32 %x, %z
+ %sext1 = zext nneg i32 %add1 to i64
+ %sext2 = zext nneg i32 %add2 to i64
+ %sub = sub i64 %sext1, %sext2
+ ret i64 %sub
+}
+
define i64 @src_2add_2sext_sub_nsw(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @src_2add_2sext_sub_nsw(
; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y:%.*]] to i64
@@ -115,6 +130,18 @@ define i64 @src_x_add_2sext_sub_2(i32 %x, i32 %y) {
ret i64 %sub
}
+define i64 @src_x_add_2sextlike_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: @src_x_add_2sextlike_sub(
+; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %add1 = add nsw i32 %x, %y
+ %sext1 = zext nneg i32 %add1 to i64
+ %sext2 = zext nneg i32 %x to i64
+ %sub = sub i64 %sext1, %sext2
+ ret i64 %sub
+}
+
define i64 @src_x_add_2sext_sub_nsw(i32 %x, i32 %y) {
; CHECK-LABEL: @src_x_add_2sext_sub_nsw(
; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y:%.*]] to i64
More information about the llvm-commits
mailing list