[llvm] aaad507 - [RISCV] Return false from isOffsetFoldingLegal instead of reversing the fold in lowering.

Fri May 27 11:05:40 PDT 2022

Author: Craig Topper
Date: 2022-05-27T11:05:18-07:00
New Revision: aaad507546a5cc788aabc907ec47bbbfb8283e8e

URL: https://github.com/llvm/llvm-project/commit/aaad507546a5cc788aabc907ec47bbbfb8283e8e
DIFF: https://github.com/llvm/llvm-project/commit/aaad507546a5cc788aabc907ec47bbbfb8283e8e.diff

LOG: [RISCV] Return false from isOffsetFoldingLegal instead of reversing the fold in lowering.

When lowering GlobalAddressNodes, we were removing a non-zero offset and
creating a separate ADD.

It already comes out of SelectionDAGBuilder with a separate ADD. The
ADD was being removed by DAGCombiner.

This patch disables the DAG combine so we don't have to reverse it.
Test changes all look to be instruction order changes. Probably due
to different DAG node ordering.

Differential Revision: https://reviews.llvm.org/D126558

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/test/CodeGen/RISCV/double-mem.ll
    llvm/test/CodeGen/RISCV/float-mem.ll
    llvm/test/CodeGen/RISCV/half-mem.ll
    llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
    llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
    llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
    llvm/test/CodeGen/RISCV/mem.ll
    llvm/test/CodeGen/RISCV/mem64.ll
    llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4f74f3f01795c..17c8870f24192 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1231,6 +1231,15 @@ bool RISCVTargetLowering::shouldSinkOperands(
   return true;
 }
 
+bool RISCVTargetLowering::isOffsetFoldingLegal(
+    const GlobalAddressSDNode *GA) const {
+  // In order to maximise the opportunity for common subexpression elimination,
+  // keep a separate ADD node for the global address offset instead of folding
+  // it in the global address node. Later peephole optimisations may choose to
+  // fold it back in when profitable.
+  return false;
+}
+
 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
                                        bool ForCodeSize) const {
   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
@@ -3578,21 +3587,12 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
   SDLoc DL(Op);
   EVT Ty = Op.getValueType();
   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
-  int64_t Offset = N->getOffset();
+  assert(N->getOffset() == 0 && "unexpected offset in global node");
   MVT XLenVT = Subtarget.getXLenVT();
 
   const GlobalValue *GV = N->getGlobal();
   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
-  SDValue Addr = getAddr(N, DAG, IsLocal);
-
-  // In order to maximise the opportunity for common subexpression elimination,
-  // emit a separate ADD node for the global address offset instead of folding
-  // it in the global address node. Later peephole optimisations may choose to
-  // fold it back in when profitable.
-  if (Offset != 0)
-    return DAG.getNode(ISD::ADD, DL, Ty, Addr,
-                       DAG.getConstant(Offset, DL, XLenVT));
-  return Addr;
+  return getAddr(N, DAG, IsLocal);
 }
 
 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
@@ -3701,7 +3701,7 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
   SDLoc DL(Op);
   EVT Ty = Op.getValueType();
   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
-  int64_t Offset = N->getOffset();
+  assert(N->getOffset() == 0 && "unexpected offset in global node");
   MVT XLenVT = Subtarget.getXLenVT();
 
   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
@@ -3724,13 +3724,6 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
     break;
   }
 
-  // In order to maximise the opportunity for common subexpression elimination,
-  // emit a separate ADD node for the global address offset instead of folding
-  // it in the global address node. Later peephole optimisations may choose to
-  // fold it back in when profitable.
-  if (Offset != 0)
-    return DAG.getNode(ISD::ADD, DL, Ty, Addr,
-                       DAG.getConstant(Offset, DL, XLenVT));
   return Addr;
 }
 

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 1c461fb38ceb0..b3e9ec6c117c8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -354,6 +354,7 @@ class RISCVTargetLowering : public TargetLowering {
       SelectionDAG &DAG) const override;
   bool shouldSinkOperands(Instruction *I,
                           SmallVectorImpl<Use *> &Ops) const override;
+  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
 

diff  --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll
index ba0b652ddffc7..fc42f2a236453 100644
--- a/llvm/test/CodeGen/RISCV/double-mem.ll
+++ b/llvm/test/CodeGen/RISCV/double-mem.ll
@@ -59,10 +59,10 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
 ; RV32IFD-NEXT:    fadd.d fa0, fa0, fa1
 ; RV32IFD-NEXT:    lui a0, %hi(G)
 ; RV32IFD-NEXT:    fld ft0, %lo(G)(a0)
+; RV32IFD-NEXT:    addi a1, a0, %lo(G)
 ; RV32IFD-NEXT:    fsd fa0, %lo(G)(a0)
-; RV32IFD-NEXT:    addi a0, a0, %lo(G)
-; RV32IFD-NEXT:    fld ft0, 72(a0)
-; RV32IFD-NEXT:    fsd fa0, 72(a0)
+; RV32IFD-NEXT:    fld ft0, 72(a1)
+; RV32IFD-NEXT:    fsd fa0, 72(a1)
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fld_fsd_global:
@@ -70,10 +70,10 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
 ; RV64IFD-NEXT:    fadd.d fa0, fa0, fa1
 ; RV64IFD-NEXT:    lui a0, %hi(G)
 ; RV64IFD-NEXT:    fld ft0, %lo(G)(a0)
+; RV64IFD-NEXT:    addi a1, a0, %lo(G)
 ; RV64IFD-NEXT:    fsd fa0, %lo(G)(a0)
-; RV64IFD-NEXT:    addi a0, a0, %lo(G)
-; RV64IFD-NEXT:    fld ft0, 72(a0)
-; RV64IFD-NEXT:    fsd fa0, 72(a0)
+; RV64IFD-NEXT:    fld ft0, 72(a1)
+; RV64IFD-NEXT:    fsd fa0, 72(a1)
 ; RV64IFD-NEXT:    ret
 ; Use %a and %b in an FP op to ensure floating point registers are used, even
 ; for the soft float ABI

diff  --git a/llvm/test/CodeGen/RISCV/float-mem.ll b/llvm/test/CodeGen/RISCV/float-mem.ll
index fe1e9bcf431b8..f90272c816afb 100644
--- a/llvm/test/CodeGen/RISCV/float-mem.ll
+++ b/llvm/test/CodeGen/RISCV/float-mem.ll
@@ -61,10 +61,10 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
 ; RV32IF-NEXT:    fadd.s fa0, fa0, fa1
 ; RV32IF-NEXT:    lui a0, %hi(G)
 ; RV32IF-NEXT:    flw ft0, %lo(G)(a0)
+; RV32IF-NEXT:    addi a1, a0, %lo(G)
 ; RV32IF-NEXT:    fsw fa0, %lo(G)(a0)
-; RV32IF-NEXT:    addi a0, a0, %lo(G)
-; RV32IF-NEXT:    flw ft0, 36(a0)
-; RV32IF-NEXT:    fsw fa0, 36(a0)
+; RV32IF-NEXT:    flw ft0, 36(a1)
+; RV32IF-NEXT:    fsw fa0, 36(a1)
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: flw_fsw_global:
@@ -72,10 +72,10 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
 ; RV64IF-NEXT:    fadd.s fa0, fa0, fa1
 ; RV64IF-NEXT:    lui a0, %hi(G)
 ; RV64IF-NEXT:    flw ft0, %lo(G)(a0)
+; RV64IF-NEXT:    addi a1, a0, %lo(G)
 ; RV64IF-NEXT:    fsw fa0, %lo(G)(a0)
-; RV64IF-NEXT:    addi a0, a0, %lo(G)
-; RV64IF-NEXT:    flw ft0, 36(a0)
-; RV64IF-NEXT:    fsw fa0, 36(a0)
+; RV64IF-NEXT:    flw ft0, 36(a1)
+; RV64IF-NEXT:    fsw fa0, 36(a1)
 ; RV64IF-NEXT:    ret
   %1 = fadd float %a, %b
   %2 = load volatile float, float* @G

diff  --git a/llvm/test/CodeGen/RISCV/half-mem.ll b/llvm/test/CodeGen/RISCV/half-mem.ll
index 3e99e17057660..9049b92946a62 100644
--- a/llvm/test/CodeGen/RISCV/half-mem.ll
+++ b/llvm/test/CodeGen/RISCV/half-mem.ll
@@ -61,10 +61,10 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
 ; RV32IZFH-NEXT:    fadd.h fa0, fa0, fa1
 ; RV32IZFH-NEXT:    lui a0, %hi(G)
 ; RV32IZFH-NEXT:    flh ft0, %lo(G)(a0)
+; RV32IZFH-NEXT:    addi a1, a0, %lo(G)
 ; RV32IZFH-NEXT:    fsh fa0, %lo(G)(a0)
-; RV32IZFH-NEXT:    addi a0, a0, %lo(G)
-; RV32IZFH-NEXT:    flh ft0, 18(a0)
-; RV32IZFH-NEXT:    fsh fa0, 18(a0)
+; RV32IZFH-NEXT:    flh ft0, 18(a1)
+; RV32IZFH-NEXT:    fsh fa0, 18(a1)
 ; RV32IZFH-NEXT:    ret
 ;
 ; RV64IZFH-LABEL: flh_fsh_global:
@@ -72,10 +72,10 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
 ; RV64IZFH-NEXT:    fadd.h fa0, fa0, fa1
 ; RV64IZFH-NEXT:    lui a0, %hi(G)
 ; RV64IZFH-NEXT:    flh ft0, %lo(G)(a0)
+; RV64IZFH-NEXT:    addi a1, a0, %lo(G)
 ; RV64IZFH-NEXT:    fsh fa0, %lo(G)(a0)
-; RV64IZFH-NEXT:    addi a0, a0, %lo(G)
-; RV64IZFH-NEXT:    flh ft0, 18(a0)
-; RV64IZFH-NEXT:    fsh fa0, 18(a0)
+; RV64IZFH-NEXT:    flh ft0, 18(a1)
+; RV64IZFH-NEXT:    fsh fa0, 18(a1)
 ; RV64IZFH-NEXT:    ret
   %1 = fadd half %a, %b
   %2 = load volatile half, half* @G

diff  --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 85df696eb6a50..7ebc526bb4d33 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -99,11 +99,11 @@ define dso_local i32* @big_offset_one_use() local_unnamed_addr nounwind {
 ;
 ; RV64-LABEL: big_offset_one_use:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    lui a0, 4
-; RV64-NEXT:    addiw a0, a0, 188
-; RV64-NEXT:    lui a1, %hi(s)
-; RV64-NEXT:    addi a1, a1, %lo(s)
-; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    lui a0, %hi(s)
+; RV64-NEXT:    addi a0, a0, %lo(s)
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    addiw a1, a1, 188
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    ret
 entry:
   ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 5)

diff  --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
index d062709000f04..25d32d76dbce8 100644
--- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
+++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
@@ -42,7 +42,7 @@ define void @test(i32 signext %i) nounwind {
 ; RV32-NEXT:    addi a3, a3, 1
 ; RV32-NEXT:  .LBB0_2: # %bb
 ; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32-NEXT:    add a4, a1, a2
+; RV32-NEXT:    add a4, a2, a1
 ; RV32-NEXT:    add a1, a1, a0
 ; RV32-NEXT:    sb zero, 0(a4)
 ; RV32-NEXT:    blt a1, a3, .LBB0_2
@@ -65,7 +65,7 @@ define void @test(i32 signext %i) nounwind {
 ; RV64-NEXT:    addw a5, a5, a1
 ; RV64-NEXT:    slli a6, a5, 32
 ; RV64-NEXT:    srli a6, a6, 32
-; RV64-NEXT:    add a6, a6, a3
+; RV64-NEXT:    add a6, a3, a6
 ; RV64-NEXT:    sb zero, 0(a6)
 ; RV64-NEXT:    addw a5, a5, a0
 ; RV64-NEXT:    addiw a2, a2, 1

diff  --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
index a288b4ea77787..ede08528198b2 100644
--- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
+++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
@@ -54,10 +54,10 @@ define void @test(i32 signext %row, i32 signext %N.in) nounwind {
 ; RV64-NEXT:    blez a1, .LBB0_3
 ; RV64-NEXT:  # %bb.1: # %cond_true.preheader
 ; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    slli a0, a0, 6
 ; RV64-NEXT:    lui a2, %hi(A)
 ; RV64-NEXT:    addi a2, a2, %lo(A)
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, a0, a2
+; RV64-NEXT:    add a0, a2, a0
 ; RV64-NEXT:    li a2, 4
 ; RV64-NEXT:    li a3, 5
 ; RV64-NEXT:  .LBB0_2: # %cond_true

diff  --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll
index e18f5018e403c..7af934fdacabf 100644
--- a/llvm/test/CodeGen/RISCV/mem.ll
+++ b/llvm/test/CodeGen/RISCV/mem.ll
@@ -170,10 +170,10 @@ define dso_local i32 @lw_sw_global(i32 %a) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a2, %hi(G)
 ; RV32I-NEXT:    lw a1, %lo(G)(a2)
+; RV32I-NEXT:    addi a3, a2, %lo(G)
 ; RV32I-NEXT:    sw a0, %lo(G)(a2)
-; RV32I-NEXT:    addi a2, a2, %lo(G)
-; RV32I-NEXT:    lw a3, 36(a2)
-; RV32I-NEXT:    sw a0, 36(a2)
+; RV32I-NEXT:    lw a2, 36(a3)
+; RV32I-NEXT:    sw a0, 36(a3)
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:    ret
   %1 = load volatile i32, i32* @G

diff  --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll
index 19f741335370d..47ab20e950c5e 100644
--- a/llvm/test/CodeGen/RISCV/mem64.ll
+++ b/llvm/test/CodeGen/RISCV/mem64.ll
@@ -215,10 +215,10 @@ define dso_local i64 @ld_sd_global(i64 %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a2, %hi(G)
 ; RV64I-NEXT:    ld a1, %lo(G)(a2)
+; RV64I-NEXT:    addi a3, a2, %lo(G)
 ; RV64I-NEXT:    sd a0, %lo(G)(a2)
-; RV64I-NEXT:    addi a2, a2, %lo(G)
-; RV64I-NEXT:    ld a3, 72(a2)
-; RV64I-NEXT:    sd a0, 72(a2)
+; RV64I-NEXT:    ld a2, 72(a3)
+; RV64I-NEXT:    sd a0, 72(a3)
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    ret
   %1 = load volatile i64, i64* @G

diff  --git a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
index 4b5f18de7d175..55ea182616643 100644
--- a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
+++ b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
@@ -10,12 +10,12 @@ define dso_local i32 @test_zext_i8() nounwind {
 ; RV32I-LABEL: test_zext_i8:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(bytes)
-; RV32I-NEXT:    lbu a1, %lo(bytes)(a0)
-; RV32I-NEXT:    addi a0, a0, %lo(bytes)
-; RV32I-NEXT:    lbu a0, 1(a0)
-; RV32I-NEXT:    xori a1, a1, 136
-; RV32I-NEXT:    xori a0, a0, 7
-; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    addi a1, a0, %lo(bytes)
+; RV32I-NEXT:    lbu a0, %lo(bytes)(a0)
+; RV32I-NEXT:    lbu a1, 1(a1)
+; RV32I-NEXT:    xori a0, a0, 136
+; RV32I-NEXT:    xori a1, a1, 7
+; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    beqz a0, .LBB0_2
 ; RV32I-NEXT:  # %bb.1: # %if.then
 ; RV32I-NEXT:    li a0, 1
@@ -42,14 +42,14 @@ define dso_local i32 @test_zext_i16() nounwind {
 ; RV32I-LABEL: test_zext_i16:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(shorts)
-; RV32I-NEXT:    lhu a1, %lo(shorts)(a0)
-; RV32I-NEXT:    addi a0, a0, %lo(shorts)
-; RV32I-NEXT:    lhu a0, 2(a0)
+; RV32I-NEXT:    addi a1, a0, %lo(shorts)
+; RV32I-NEXT:    lhu a0, %lo(shorts)(a0)
+; RV32I-NEXT:    lhu a1, 2(a1)
 ; RV32I-NEXT:    lui a2, 16
 ; RV32I-NEXT:    addi a2, a2, -120
-; RV32I-NEXT:    xor a1, a1, a2
-; RV32I-NEXT:    xori a0, a0, 7
-; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    xor a0, a0, a2
+; RV32I-NEXT:    xori a1, a1, 7
+; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    beqz a0, .LBB1_2
 ; RV32I-NEXT:  # %bb.1: # %if.then
 ; RV32I-NEXT:    li a0, 1