[llvm] [InstCombine] Preserve the nsw flag for (X | Op01C) + Op1C --> X + (Op01C + Op1C) (PR #94586)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 6 20:47:47 PDT 2024
https://github.com/csstormq updated https://github.com/llvm/llvm-project/pull/94586
>From 170cfa2c576a3c568037cb2901615b4608515aa8 Mon Sep 17 00:00:00 2001
From: csstormq <swust_xiaoqiangxu at 163.com>
Date: Thu, 6 Jun 2024 12:04:50 +0800
Subject: [PATCH 1/3] [InstCombine] Preserve the nsw flag for (X | Op01C) +
Op1C --> X + (Op01C + Op1C)
---
.../InstCombine/InstCombineAddSub.cpp | 13 +++++++++++--
.../InstCombine/sadd-with-overflow.ll | 2 +-
.../Transforms/InstCombine/sdiv-simplify.ll | 15 +++++++++++++++
.../AArch64/matrix-extract-insert.ll | 18 +++++++++---------
4 files changed, 36 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/sdiv-simplify.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 8205b49dfbe2f..b2c1cfcd1148c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -905,8 +905,17 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add`
Constant *Op01C;
- if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C))))
- return BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));
+ if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) {
+ bool HasNSW = Add.hasNoSignedWrap();
+ BinaryOperator *NewAdd =
+ BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));
+ // Preserve the nsw flag so that there is a chance to make some other
+ // transformations.
+ // For some cases, sdiv can be converted to udiv when the newly created add
+ // carrying the nsw flag is one of its operands.
+ NewAdd->setHasNoSignedWrap(HasNSW);
+ return NewAdd;
+ }
// (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
const APInt *C2;
diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
index 729ca03ddfd15..e4dd2d10637d3 100644
--- a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
+++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
@@ -125,7 +125,7 @@ define { i32, i1 } @fold_sub_simple(i32 %x) {
define { i32, i1 } @fold_with_distjoin_or(i32 %x) {
; CHECK-LABEL: @fold_with_distjoin_or(
-; CHECK-NEXT: [[B:%.*]] = add i32 [[X:%.*]], 6
+; CHECK-NEXT: [[B:%.*]] = add nsw i32 [[X:%.*]], 6
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[B]], 0
; CHECK-NEXT: ret { i32, i1 } [[TMP1]]
;
diff --git a/llvm/test/Transforms/InstCombine/sdiv-simplify.ll b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll
new file mode 100644
index 0000000000000..91d648e9093a7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) {
+; CHECK-LABEL: @sdiv_to_udiv(
+; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8
+; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0:%.*]], 6242049
+; CHECK-NEXT: [[T3:%.*]] = udiv i32 [[T2]], 192
+; CHECK-NEXT: ret i32 [[T3]]
+;
+ %t0 = shl nuw nsw i32 %arg0, 8
+ %t1 = or disjoint i32 %t0, 1
+ %t2 = add nuw nsw i32 %t1, 6242048
+ %t3 = sdiv i32 %t2, 192
+ ret i32 %t3
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
index 5cbf50e06fbe8..c4cd2379a7be2 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
@@ -182,11 +182,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
; CHECK: vector.body.1:
; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ]
; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15
-; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[INDEX_1]], 16
+; CHECK-NEXT: [[TMP34:%.*]] = add nsw i64 [[INDEX_1]], 16
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1
-; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[INDEX_1]], 17
-; CHECK-NEXT: [[TMP38:%.*]] = add i64 [[INDEX_1]], 18
+; CHECK-NEXT: [[TMP37:%.*]] = add nsw i64 [[INDEX_1]], 17
+; CHECK-NEXT: [[TMP38:%.*]] = add nsw i64 [[INDEX_1]], 18
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1
; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], <i64 225, i64 225>
@@ -259,11 +259,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
; CHECK: vector.body.2:
; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ]
; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30
-; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX_2]], 31
+; CHECK-NEXT: [[TMP65:%.*]] = add nsw i64 [[INDEX_2]], 31
; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1
-; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[INDEX_2]], 32
-; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[INDEX_2]], 33
+; CHECK-NEXT: [[TMP68:%.*]] = add nsw i64 [[INDEX_2]], 32
+; CHECK-NEXT: [[TMP69:%.*]] = add nsw i64 [[INDEX_2]], 33
; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0
; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1
; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], <i64 225, i64 225>
@@ -336,11 +336,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
; CHECK: vector.body.3:
; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ]
; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45
-; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[INDEX_3]], 46
+; CHECK-NEXT: [[TMP96:%.*]] = add nsw i64 [[INDEX_3]], 46
; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0
; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1
-; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX_3]], 47
-; CHECK-NEXT: [[TMP100:%.*]] = add i64 [[INDEX_3]], 48
+; CHECK-NEXT: [[TMP99:%.*]] = add nsw i64 [[INDEX_3]], 47
+; CHECK-NEXT: [[TMP100:%.*]] = add nsw i64 [[INDEX_3]], 48
; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0
; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1
; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], <i64 225, i64 225>
>From 8b6eca178655285d688fcaae3f496b0bf700beac Mon Sep 17 00:00:00 2001
From: csstormq <swust_xiaoqiangxu at 163.com>
Date: Thu, 6 Jun 2024 17:03:08 +0800
Subject: [PATCH 2/3] update test case using the update_test_checks.py script
---
llvm/test/Transforms/InstCombine/sdiv-simplify.ll | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/sdiv-simplify.ll b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll
index 91d648e9093a7..cef999a398caf 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-simplify.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll
@@ -1,9 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) {
-; CHECK-LABEL: @sdiv_to_udiv(
-; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8
-; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0:%.*]], 6242049
+; CHECK-LABEL: define i32 @sdiv_to_udiv(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0]], 8
+; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0]], 6242049
; CHECK-NEXT: [[T3:%.*]] = udiv i32 [[T2]], 192
; CHECK-NEXT: ret i32 [[T3]]
;
>From 3cf7a70ee6a0cf9cb40e5a762be272575196529c Mon Sep 17 00:00:00 2001
From: csstormq <swust_xiaoqiangxu at 163.com>
Date: Fri, 7 Jun 2024 11:47:06 +0800
Subject: [PATCH 3/3] fixup! Preserve the nsw flag iff the sum of Op01C and
Op1C will not overflow
---
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index b2c1cfcd1148c..9b86c4cefac06 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -906,14 +906,14 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add`
Constant *Op01C;
if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) {
- bool HasNSW = Add.hasNoSignedWrap();
BinaryOperator *NewAdd =
BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));
- // Preserve the nsw flag so that there is a chance to make some other
- // transformations.
+ // Preserve the nsw flag iff the sum of Op01C and Op1C will not overflow
+ // so that there is a chance to make some other transformations.
// For some cases, sdiv can be converted to udiv when the newly created add
// carrying the nsw flag is one of its operands.
- NewAdd->setHasNoSignedWrap(HasNSW);
+ if (willNotOverflowSignedAdd(Op01C, Op1C, Add))
+ NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap());
return NewAdd;
}
More information about the llvm-commits
mailing list