[llvm] 480e7f4 - [AArch64][GlobalISel] Share address mode selection code for memops

Wed Sep 9 15:25:15 PDT 2020

Author: Jessica Paquette
Date: 2020-09-09T15:14:46-07:00
New Revision: 480e7f43a22578beaa2edc7a271e77793222a1c3

URL: https://github.com/llvm/llvm-project/commit/480e7f43a22578beaa2edc7a271e77793222a1c3
DIFF: https://github.com/llvm/llvm-project/commit/480e7f43a22578beaa2edc7a271e77793222a1c3.diff

LOG: [AArch64][GlobalISel] Share address mode selection code for memops

We were missing support for the G_ADD_LOW + ADRP folding optimization in the
manual selection code for G_LOAD, G_STORE, and G_ZEXTLOAD.

As a result, we were missing cases like this:

```
@foo = external hidden global i32*
define void @baz(i32* %0) {
store i32* %0, i32** @foo
ret void
}
```

https://godbolt.org/z/16r7ad

This functionality already existed in the addressing mode functions for the
importer. So, this patch makes the manual selection code use
`selectAddrModeIndexed` rather than duplicating work.

This is a 0.2% geomean code size improvement for CTMark at -O3.

There is one code size increase (0.1% on lencod) which is likely because
`selectAddrModeIndexed` doesn't look through constants.

Differential Revision: https://reviews.llvm.org/D87397

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index a8d68180bb76..228db83533cd 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2260,18 +2260,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     }
 
     auto &MemOp = **I.memoperands_begin();
+    uint64_t MemSizeInBytes = MemOp.getSize();
     if (MemOp.isAtomic()) {
       // For now we just support s8 acquire loads to be able to compile stack
       // protector code.
       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
-          MemOp.getSize() == 1) {
+          MemSizeInBytes == 1) {
         I.setDesc(TII.get(AArch64::LDARB));
         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
       }
       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
       return false;
     }
-    unsigned MemSizeInBits = MemOp.getSize() * 8;
+    unsigned MemSizeInBits = MemSizeInBytes * 8;
 
     const Register PtrReg = I.getOperand(1).getReg();
 #ifndef NDEBUG
@@ -2286,78 +2287,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     const Register ValReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
 
-    const unsigned NewOpc =
-        selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
-    if (NewOpc == I.getOpcode())
-      return false;
-
-    I.setDesc(TII.get(NewOpc));
-
-    uint64_t Offset = 0;
-    auto *PtrMI = MRI.getVRegDef(PtrReg);
-
-    // Try to fold a GEP into our unsigned immediate addressing mode.
-    if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
-      if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
-        int64_t Imm = *COff;
-        const unsigned Size = MemSizeInBits / 8;
-        const unsigned Scale = Log2_32(Size);
-        if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
-          Register Ptr2Reg = PtrMI->getOperand(1).getReg();
-          I.getOperand(1).setReg(Ptr2Reg);
-          PtrMI = MRI.getVRegDef(Ptr2Reg);
-          Offset = Imm / Size;
-        }
+    // Helper lambda for partially selecting I. Either returns the original
+    // instruction with an updated opcode, or a new instruction.
+    auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
+      bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+      const unsigned NewOpc =
+          selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
+      if (NewOpc == I.getOpcode())
+        return nullptr;
+      // Check if we can fold anything into the addressing mode.
+      auto AddrModeFns =
+          selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
+      if (!AddrModeFns) {
+        // Can't fold anything. Use the original instruction.
+        I.setDesc(TII.get(NewOpc));
+        I.addOperand(MachineOperand::CreateImm(0));
+        return &I;
       }
-    }
 
-    // If we haven't folded anything into our addressing mode yet, try to fold
-    // a frame index into the base+offset.
-    if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
-      I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
+      // Folded something. Create a new instruction and return it.
+      auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
+      IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+      NewInst.cloneMemRefs(I);
+      for (auto &Fn : *AddrModeFns)
+        Fn(NewInst);
+      I.eraseFromParent();
+      return &*NewInst;
+    };
 
-    I.addOperand(MachineOperand::CreateImm(Offset));
+    MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
+    if (!LoadStore)
+      return false;
 
     // If we're storing a 0, use WZR/XZR.
     if (Opcode == TargetOpcode::G_STORE) {
       auto CVal = getConstantVRegValWithLookThrough(
-          ValReg, MRI, /*LookThroughInstrs = */ true,
+          LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
           /*HandleFConstants = */ false);
       if (CVal && CVal->Value == 0) {
-        unsigned Opc = I.getOpcode();
-        switch (Opc) {
+        switch (LoadStore->getOpcode()) {
         case AArch64::STRWui:
         case AArch64::STRHHui:
         case AArch64::STRBBui:
-          I.getOperand(0).setReg(AArch64::WZR);
+          LoadStore->getOperand(0).setReg(AArch64::WZR);
           break;
         case AArch64::STRXui:
-          I.getOperand(0).setReg(AArch64::XZR);
+          LoadStore->getOperand(0).setReg(AArch64::XZR);
           break;
         }
       }
     }
 
     if (IsZExtLoad) {
-      // The zextload from a smaller type to i32 should be handled by the importer.
-      if (MRI.getType(ValReg).getSizeInBits() != 64)
+      // The zextload from a smaller type to i32 should be handled by the
+      // importer.
+      if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
         return false;
       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
-      //and zero_extend with SUBREG_TO_REG.
+      // and zero_extend with SUBREG_TO_REG.
       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-      Register DstReg = I.getOperand(0).getReg();
-      I.getOperand(0).setReg(LdReg);
+      Register DstReg = LoadStore->getOperand(0).getReg();
+      LoadStore->getOperand(0).setReg(LdReg);
 
-      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+      MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
           .addImm(0)
           .addUse(LdReg)
           .addImm(AArch64::sub_32);
-      constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+      constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
                                           MRI);
     }
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+    return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
   }
 
   case TargetOpcode::G_SMULH:

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
index db355dfc151f..05038b40ca36 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
@@ -39,6 +39,9 @@
   define void @store_8xi16(<8 x i16> %v, <8 x i16>* %ptr) { ret void }
   define void @store_16xi8(<16 x i8> %v, <16 x i8>* %ptr) { ret void }
 
+  @x = external hidden local_unnamed_addr global i32*, align 8
+  define void @store_adrp_add_low() { ret void }
+
 ...
 
 ---
@@ -600,3 +603,20 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            store_adrp_add_low
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: store_adrp_add_low
+    ; CHECK: liveins: $x0
+    ; CHECK: %copy:gpr64 = COPY $x0
+    ; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x
+    ; CHECK: STRXui %copy, %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store 8 into @x)
+    %copy:gpr(p0) = COPY $x0
+    %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x
+    %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
+    G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)