[llvm] [InstCombine] Shrink add's RHS constant based on LHSKnown (PR #111233)

Fri Oct 4 22:49:41 PDT 2024

https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/111233

If lowbits are not demanded and lowbits in LHS are known zero, we can further optimize RHS based on LHSKnown. This patch only shrinks the RHS constant since recursively calling `SimplifyDemandedBits` on RHS again may be expensive.


>From 0baa7c643933298e05996abe10056b4840c64871 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 5 Oct 2024 13:29:44 +0800
Subject: [PATCH 1/2] [InstCombine] Add pre-commit tests. NFC.

---
 llvm/test/Transforms/InstCombine/add.ll | 79 +++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 417c3a950d7805..3d335427b3d77b 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -4245,5 +4245,84 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) {
   ret i32 %r
 }
 
+define i64 @shrink_add_rhs_constant1(i64 %x) {
+; CHECK-LABEL: @shrink_add_rhs_constant1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[V30:%.*]], 9223372036854775806
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[AND]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i64 [[ADD]], -2
+; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[AND1]], 2
+; CHECK-NEXT:    ret i64 [[ADD2]]
+;
+entry:
+  %and1 = and i64 %x, 9223372036854775806
+  %add1 = add i64 %and1, -1
+  %and2 = and i64 %add1, -2
+  %add2 = add i64 %and2, 2
+  ret i64 %add2
+}
+
+define i32 @shrink_add_rhs_constant2(i32 %x) {
+; CHECK-LABEL: @shrink_add_rhs_constant2(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[N:%.*]], 1
+; CHECK-NEXT:    [[MUL:%.*]] = and i32 [[X]], -4
+; CHECK-NEXT:    [[ADD_I:%.*]] = add nsw i32 [[MUL]], 7
+; CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[ADD_I]], -8
+; CHECK-NEXT:    ret i32 [[AND_I]]
+;
+  %shl = shl i32 %x, 1
+  %mul = and i32 %shl, -4
+  %add = add nsw i32 %mul, 7
+  %and = and i32 %add, -8
+  ret i32 %and
+}
+
+define i64 @shrink_add_rhs_constant3(i64 %x) {
+; CHECK-LABEL: @shrink_add_rhs_constant3(
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP0:%.*]], 23
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -8
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], 15
+; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -8
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %add1 = add i64 %x, 23
+  %and1 = and i64 %add1, -8
+  %add2 = add i64 %and1, 15
+  %and2 = and i64 %add2, -8
+  ret i64 %and2
+}
+
+define i64 @shrink_add_rhs_constant4(i64 %x) {
+; CHECK-LABEL: @shrink_add_rhs_constant4(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[TMP0:%.*]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP2]], 23
+; CHECK-NEXT:    [[TMP4:%.*]] = and i64 [[TMP5]], -16
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP4]], 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %shl = shl nsw i64 %x, 4
+  %add = add i64 %shl, 23
+  %and = and i64 %add, -16
+  %or = or disjoint i64 %and, 8
+  ret i64 %or
+}
+
+define i64 @shrink_add_rhs_constant5(i64 %x) {
+; CHECK-LABEL: @shrink_add_rhs_constant5(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP1]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP5]], -8
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP4]], -8
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %shl = shl nsw i64 %x, 3
+  %add1 = add i64 %shl, 15
+  %and1 = and i64 %add1, -8
+  %add2 = add i64 %and1, 15
+  %and2 = and i64 %add2, -8
+  ret i64 %and2
+}
+
 declare void @llvm.assume(i1)
 declare void @fake_func(i32)

>From 5eb8bef48941758e56ec2ab7dc5164f88061465e Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 5 Oct 2024 13:43:31 +0800
Subject: [PATCH 2/2] [InstCombine] Shrink add's RHS constant based on LHSKnown

---
 .../InstCombineSimplifyDemanded.cpp           |  8 +++++++
 llvm/test/Transforms/InstCombine/add.ll       | 23 +++++--------------
 .../Transforms/InstCombine/rem-mul-shl.ll     |  6 ++---
 llvm/utils/UpdateTestChecks/common.py         |  2 +-
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 3d4461dc1a87f6..d271931da73c6b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -540,6 +540,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
         SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Depth + 1, Q))
       return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
 
+    unsigned LHSNTZ = (~DemandedMask & LHSKnown.Zero).countr_one();
+    if (LHSNTZ != 0) {
+      APInt DemandedFromRHS = DemandedFromOps;
+      DemandedFromRHS.clearLowBits(LHSNTZ);
+      if (ShrinkDemandedConstant(I, 1, DemandedFromRHS))
+        return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
+    }
+
     // If we are known to be adding zeros to every bit below
     // the highest demanded bit, we just return the other side.
     if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 3d335427b3d77b..5e4f7d418ce800 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -1608,7 +1608,7 @@ define i8 @fold_add_constant_preserve_nuw(i8 %x) {
 define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) {
 ; CHECK-LABEL: @sdiv_to_udiv(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8
-; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw i32 [[T0]], 6242049
+; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw i32 [[T0]], 6242048
 ; CHECK-NEXT:    [[T3:%.*]] = udiv i32 [[T2]], 192
 ; CHECK-NEXT:    ret i32 [[T3]]
 ;
@@ -4249,10 +4249,7 @@ define i64 @shrink_add_rhs_constant1(i64 %x) {
 ; CHECK-LABEL: @shrink_add_rhs_constant1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[V30:%.*]], 9223372036854775806
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[AND]], -1
-; CHECK-NEXT:    [[AND1:%.*]] = and i64 [[ADD]], -2
-; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[AND1]], 2
-; CHECK-NEXT:    ret i64 [[ADD2]]
+; CHECK-NEXT:    ret i64 [[AND]]
 ;
 entry:
   %and1 = and i64 %x, 9223372036854775806
@@ -4265,8 +4262,7 @@ entry:
 define i32 @shrink_add_rhs_constant2(i32 %x) {
 ; CHECK-LABEL: @shrink_add_rhs_constant2(
 ; CHECK-NEXT:    [[X:%.*]] = shl i32 [[N:%.*]], 1
-; CHECK-NEXT:    [[MUL:%.*]] = and i32 [[X]], -4
-; CHECK-NEXT:    [[ADD_I:%.*]] = add nsw i32 [[MUL]], 7
+; CHECK-NEXT:    [[ADD_I:%.*]] = add nsw i32 [[X]], 4
 ; CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[ADD_I]], -8
 ; CHECK-NEXT:    ret i32 [[AND_I]]
 ;
@@ -4279,9 +4275,7 @@ define i32 @shrink_add_rhs_constant2(i32 %x) {
 
 define i64 @shrink_add_rhs_constant3(i64 %x) {
 ; CHECK-LABEL: @shrink_add_rhs_constant3(
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP0:%.*]], 23
-; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -8
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[X:%.*]], 31
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -8
 ; CHECK-NEXT:    ret i64 [[TMP5]]
 ;
@@ -4295,9 +4289,7 @@ define i64 @shrink_add_rhs_constant3(i64 %x) {
 define i64 @shrink_add_rhs_constant4(i64 %x) {
 ; CHECK-LABEL: @shrink_add_rhs_constant4(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[TMP0:%.*]], 4
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP2]], 23
-; CHECK-NEXT:    [[TMP4:%.*]] = and i64 [[TMP5]], -16
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP4]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], 24
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %shl = shl nsw i64 %x, 4
@@ -4310,10 +4302,7 @@ define i64 @shrink_add_rhs_constant4(i64 %x) {
 define i64 @shrink_add_rhs_constant5(i64 %x) {
 ; CHECK-LABEL: @shrink_add_rhs_constant5(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[X:%.*]], 3
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP1]], 15
-; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP5]], -8
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], 15
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP4]], -8
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 16
 ; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %shl = shl nsw i64 %x, 3
diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
index 45db2cf6758524..21654713e339cd 100644
--- a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
+++ b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
@@ -877,8 +877,8 @@ define i64 @urem_shl_vscale_overlap() vscale_range(1,16) {
 ; CHECK-LABEL: @urem_shl_vscale_overlap(
 ; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 10
-; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047
-; CHECK-NEXT:    [[REM:%.*]] = and i64 [[TMP1]], 1024
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[SHIFT]], 1024
+; CHECK-NEXT:    [[REM:%.*]] = xor i64 [[TMP1]], 1024
 ; CHECK-NEXT:    ret i64 [[REM]]
 ;
   %vscale = call i64 @llvm.vscale.i64()
@@ -904,7 +904,7 @@ define i64 @and_add_shl_vscale_not_power2() vscale_range(1,16) {
 ; CHECK-LABEL: @and_add_shl_vscale_not_power2(
 ; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 6
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[SHIFT]], 4095
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[SHIFT]], 4032
 ; CHECK-NEXT:    [[REM:%.*]] = and i64 [[ADD]], 3072
 ; CHECK-NEXT:    ret i64 [[REM]]
 ;
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index 8ed600e5629e96..0fbb73431d2cfb 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -1603,7 +1603,7 @@ def __init__(self):
 
                 if rhs_value.name in new_color.mapping:
                     # Same, but for a possible commit happening on the same line
-                    if new_color.color[rhs_value.name] == lhs_value.name:
+                    if new_color.mapping[rhs_value.name] == lhs_value.name:
                         continue
                     else:
                         break