[llvm] [DAG] Remove restrictions and increase optimization opportunities (PR #68972)

Fri Oct 13 21:48:20 PDT 2023

https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/68972

>From c25cd56dfab63b6ca3cf33d88547f39d85f9e2db Mon Sep 17 00:00:00 2001
From: "liqin.weng" <liqin.weng at spacemit.com>
Date: Fri, 13 Oct 2023 17:57:53 +0800
Subject: [PATCH] [DAG] Remove restrictions and increase optimization
 opportunities

this patch remove the restriction for folding (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2), and test case from dhrystone
, see this link: https://godbolt.org/z/5KnjK16oG
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  5 ++--
 .../CodeGen/RISCV/riscv-shifted-extend.ll     | 26 +++++++------------
 llvm/test/CodeGen/X86/pr65895.ll              |  3 ++-
 3 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 73438113651f55d..32b434717bc6d83 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10035,7 +10035,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // Variant of version done on multiply, except mul by a power of 2 is turned
   // into a shift.
   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
-      N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+      TLI.isDesirableToCommuteWithShift(N, Level)) {
     SDValue N01 = N0.getOperand(1);
     if (SDValue Shl1 =
             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
@@ -10050,8 +10050,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // TODO: Should we limit this with isLegalAddImmediate?
   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
       N0.getOperand(0).getOpcode() == ISD::ADD &&
-      N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
-      N0.getOperand(0)->hasOneUse() &&
+      N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
       TLI.isDesirableToCommuteWithShift(N, Level)) {
     SDValue Add = N0.getOperand(0);
     SDLoc DL(N0);
diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
index 957f44f9f669dea..db2b78ebe52854b 100644
--- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
@@ -5,12 +5,10 @@
 define void @test(ptr nocapture noundef writeonly %array1, i32 noundef signext %a, i32 noundef signext %b) {
 ; RV64-LABEL: test:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    addiw a3, a1, 5
-; RV64-NEXT:    slli a4, a3, 2
-; RV64-NEXT:    add a4, a0, a4
-; RV64-NEXT:    sw a2, 0(a4)
+; RV64-NEXT:    addi a3, a1, 5
 ; RV64-NEXT:    slli a1, a1, 2
 ; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sw a2, 20(a0)
 ; RV64-NEXT:    sw a2, 24(a0)
 ; RV64-NEXT:    sw a3, 140(a0)
 ; RV64-NEXT:    ret
@@ -34,18 +32,16 @@ entry:
 define void @test1(ptr nocapture noundef %array1, i32 noundef signext %a, i32 noundef signext %b, i32 noundef signext %x) {
 ; RV64-LABEL: test1:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    addiw a4, a1, 5
-; RV64-NEXT:    slli a5, a4, 2
-; RV64-NEXT:    add a5, a0, a5
-; RV64-NEXT:    mv a6, a4
+; RV64-NEXT:    addi a4, a1, 5
+; RV64-NEXT:    mv a5, a4
 ; RV64-NEXT:    bgtz a3, .LBB1_2
 ; RV64-NEXT:  # %bb.1: # %entry
-; RV64-NEXT:    mv a6, a2
+; RV64-NEXT:    mv a5, a2
 ; RV64-NEXT:  .LBB1_2: # %entry
-; RV64-NEXT:    sw a6, 0(a5)
 ; RV64-NEXT:    slli a1, a1, 2
 ; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    sw a6, 24(a0)
+; RV64-NEXT:    sw a5, 20(a0)
+; RV64-NEXT:    sw a5, 24(a0)
 ; RV64-NEXT:    sw a4, 140(a0)
 ; RV64-NEXT:    ret
 entry:
@@ -70,11 +66,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
 ; RV64-LABEL: test2:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    addi a3, a1, 5
-; RV64-NEXT:    slli a4, a3, 3
-; RV64-NEXT:    add a4, a0, a4
-; RV64-NEXT:    sd a2, 0(a4)
 ; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sd a2, 40(a0)
 ; RV64-NEXT:    sd a2, 48(a0)
 ; RV64-NEXT:    sd a3, 280(a0)
 ; RV64-NEXT:    ret
@@ -100,11 +94,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    mv a5, a2
 ; RV64-NEXT:  .LBB3_2: # %entry
-; RV64-NEXT:    slli a2, a4, 3
-; RV64-NEXT:    add a2, a0, a2
-; RV64-NEXT:    sd a5, 0(a2)
 ; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sd a5, 40(a0)
 ; RV64-NEXT:    sd a5, 48(a0)
 ; RV64-NEXT:    sd a4, 280(a0)
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/X86/pr65895.ll b/llvm/test/CodeGen/X86/pr65895.ll
index 4fbbed4b18aff03..b9691e6bfd359df 100644
--- a/llvm/test/CodeGen/X86/pr65895.ll
+++ b/llvm/test/CodeGen/X86/pr65895.ll
@@ -21,10 +21,11 @@ define i32 @PR65895() {
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    jmp .LBB0_2
 ; CHECK-NEXT:  .LBB0_3: # %for.end
+; CHECK-NEXT:    movzbl %al, %ecx
 ; CHECK-NEXT:    addb $-3, %al
 ; CHECK-NEXT:    movsbl %al, %eax
 ; CHECK-NEXT:    movl %eax, d(%rip)
-; CHECK-NEXT:    leal 247(%rax,%rax,2), %eax
+; CHECK-NEXT:    leal 241(%rax,%rcx,2), %eax
 ; CHECK-NEXT:    movb $1, c(%rip)
 ; CHECK-NEXT:    movsbq %al, %rax
 ; CHECK-NEXT:    movq %rax, e(%rip)