[PATCH] D45748: [RISCV] Separate base from offset in lowerGlobalAddress

Sameer AbuAsal via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 17 16:13:07 PDT 2018


sabuasal created this revision.
sabuasal added reviewers: asb, apazos, shiva0217, llvm-commits.
Herald added subscribers: zzheng, kito-cheng, niosHD, jordy.potman.lists, simoncook, johnrusso, rbar.

When lowering global address, lower the base as a TargetGlobal

   first then create an SDNode for the offset separately and chain it
   to the address calculation.
  
   This optimization will create a DAG where the base address of a global
   access will be reused between different access. The offset can later
   be folded into the immediate part of the memory access instruction.
  
   With this optimization we generate:
  
    lui     a0, %hi(s)
    addi    a0, a0, %lo(s)    ; shared base address.
  
    addi    a1, zero, 20      ; 2 instructions per access.
    sw      a1, 44(a0)
  
    addi    a1, zero, 10
    sw      a1, 8(a0)
  
    addi    a1, zero, 30
    sw      a1, 80(a0)
  
  Instead of:
  
    lui     a0, %hi(s+44)      ; 3 instructions per access.
    addi    a1, zero, 20
    sw      a1, %lo(s+44)(a0)
  
    lui     a0, %hi(s+8)
    addi    a1, zero, 10
    sw      a1, %lo(s+8)(a0)
  
    lui     a0, %hi(s+80)
    addi    a1, zero, 30
    sw      a1, %lo(s+80)(a0)
  
  Which will save one instruction per access.


https://reviews.llvm.org/D45748

Files:
  lib/Target/RISCV/RISCVISelLowering.cpp
  test/CodeGen/RISCV/hoist-global-addr-base.ll


Index: test/CodeGen/RISCV/hoist-global-addr-base.ll
===================================================================
--- /dev/null
+++ test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -0,0 +1,65 @@
+; RUN: llc -mtriple=riscv32  < %s | FileCheck  %s
+
+; This test case checks that base for a gloabl address will be reused between
+; different accesses. This will allow for a shorter instruction sequence
+; for access.
+; lui     a0, %hi(s+44)
+; addi    a1, zero, 20
+; sw      a1, %lo(s+44)(a0)
+
+; lui     a0, %hi(s+8)
+; addi    a1, zero, 10
+; sw      a1, %lo(s+8)(a0)
+; The first function shows the case when the offset is small enough to be
+; folded, the second is when the offset is too big.
+
+; TODO: for the big offset, the addi following the lui for the offset
+;       can be optimized away by folding the %lo into the immediate part
+;       of the sw.
+
+%struct.S = type { i32, i32, %struct.S_nest, i32, i32, %struct.S_nest, i32, i32, %struct.S_nest }
+%struct.S_nest = type { i32, i32, i32, i32, i32, i32 }
+
+%struct.S2 = type { i32, [4100 x i32], i32, [4100 x i32], i32 }
+
+ at s = common dso_local local_unnamed_addr global %struct.S zeroinitializer, align 4
+ at s2 = common dso_local local_unnamed_addr global %struct.S2 zeroinitializer, align 4
+
+; Function Attrs: minsize norecurse nounwind optsize
+define dso_local void @foo() local_unnamed_addr {
+; CHECK-LABEL: foo:
+; CHECK:      lui     a0, %hi(s)
+; CHECK-NEXT: addi    a0, a0, %lo(s)
+; CHECK-NEXT: addi    a1, zero, 20
+; CHECK-NEXT: sw      a1, 44(a0)
+; CHECK-NEXT: addi    a1, zero, 10
+; CHECK-NEXT: sw      a1, 8(a0)
+; CHECK-NEXT: addi    a1, zero, 30
+; CHECK-NEXT: sw      a1, 80(a0)
+entry:
+  store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2, i32 0), align 4
+  store i32 20, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 5, i32 1), align 4
+  store i32 30, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 8, i32 2), align 4
+  ret void
+}
+
+; Function Attrs: norecurse nounwind optsize
+define dso_local void @bar() local_unnamed_addr #0 {
+; CHECK-LABEL: bar:
+; CHECK:      lui   a0, %hi(s2)
+; CHECK-NEXT: addi  a0, a0, %lo(s2)
+; CHECK-NEXT: lui   a1, 8
+; CHECK-NEXT: addi  a1, a1, 40
+; CHECK-NEXT: add   a1, a0, a1
+; CHECK-NEXT: addi  a2, zero, 50
+; CHECK-NEXT: sw    a2, 0(a1)
+; CHECK-NEXT: lui   a1, 4
+; CHECK-NEXT: addi  a1, a1, 20
+; CHECK-NEXT: add   a0, a0, a1
+; CHECK-NEXT: addi  a1, zero, 40
+; CHECK-NEXT: sw    a1, 0(a0)
+entry:
+  store i32 40, i32* getelementptr inbounds (%struct.S2, %struct.S2* @s2, i32 0, i32 2), align 4
+  store i32 50, i32* getelementptr inbounds (%struct.S2, %struct.S2* @s2, i32 0, i32 4), align 4
+  ret void
+}
Index: lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- lib/Target/RISCV/RISCVISelLowering.cpp
+++ lib/Target/RISCV/RISCVISelLowering.cpp
@@ -225,17 +225,21 @@
   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = N->getGlobal();
   int64_t Offset = N->getOffset();
+  MVT XLenVT = Subtarget.getXLenVT();
 
   if (isPositionIndependent() || Subtarget.is64Bit())
     report_fatal_error("Unable to lowerGlobalAddress");
 
   SDValue GAHi =
-    DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_HI);
+    DAG.getTargetGlobalAddress(GV, DL, Ty, 0,RISCVII::MO_HI);
   SDValue GALo =
-    DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_LO);
+    DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
   SDValue MNLo =
     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
+  if (Offset != 0)
+    return  DAG.getNode(ISD::ADD, DL, Ty, MNLo,
+                        DAG.getConstant(Offset, DL, XLenVT));
   return MNLo;
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D45748.142858.patch
Type: text/x-patch
Size: 3863 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180417/4b46ade2/attachment.bin>


More information about the llvm-commits mailing list