[llvm] r291180 - [AArch64] Fold some filled/spilled subreg COPYs

Geoff Berry via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 5 13:51:43 PST 2017


Author: gberry
Date: Thu Jan  5 15:51:42 2017
New Revision: 291180

URL: http://llvm.org/viewvc/llvm-project?rev=291180&view=rev
Log:
[AArch64] Fold some filled/spilled subreg COPYs

Summary:
Extend AArch64 foldMemoryOperandImpl() to handle folding spills of
subreg COPYs with read-undef defs like:

  %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64:%vreg0

by widening the spilled physical source reg and generating:

  STRXui %XZR <fi#0>

as well as folding fills of similar COPYs like:

  %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1

by generating:

  %vreg0:sub_32<def,read-undef> = LDRWui <fi#0>

Reviewers: MatzeB, qcolombet

Subscribers: aemerson, rengolin, mcrosier, llvm-commits

Differential Revision: https://reviews.llvm.org/D27425

Added:
    llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=291180&r1=291179&r2=291180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Jan  5 15:51:42 2017
@@ -2583,7 +2583,7 @@ MachineInstr *AArch64InstrInfo::foldMemo
   //
   // <rdar://problem/11522048>
   //
-  if (MI.isCopy()) {
+  if (MI.isFullCopy()) {
     unsigned DstReg = MI.getOperand(0).getReg();
     unsigned SrcReg = MI.getOperand(1).getReg();
     if (SrcReg == AArch64::SP &&
@@ -2598,7 +2598,7 @@ MachineInstr *AArch64InstrInfo::foldMemo
     }
   }
 
-  // Handle the case where a copy is being spilled or refilled but the source
+  // Handle the case where a copy is being spilled or filled but the source
   // and destination register class don't match.  For example:
   //
   //   %vreg0<def> = COPY %XZR; GPR64common:%vreg0
@@ -2613,7 +2613,7 @@ MachineInstr *AArch64InstrInfo::foldMemo
   //
   //   %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
   //
-  // will be refilled as
+  // will be filled as
   //
   //   LDRDui %vreg0, fi<#0>
   //
@@ -2622,9 +2622,11 @@ MachineInstr *AArch64InstrInfo::foldMemo
   //   LDRXui %vregTemp, fi<#0>
   //   %vreg0 = FMOV %vregTemp
   //
-  if (MI.isFullCopy() && Ops.size() == 1 &&
+  if (MI.isCopy() && Ops.size() == 1 &&
       // Make sure we're only folding the explicit COPY defs/uses.
       (Ops[0] == 0 || Ops[0] == 1)) {
+    bool IsSpill = Ops[0] == 0;
+    bool IsFill = !IsSpill;
     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
     const MachineRegisterInfo &MRI = MF.getRegInfo();
     MachineBasicBlock &MBB = *MI.getParent();
@@ -2632,21 +2634,112 @@ MachineInstr *AArch64InstrInfo::foldMemo
     const MachineOperand &SrcMO = MI.getOperand(1);
     unsigned DstReg = DstMO.getReg();
     unsigned SrcReg = SrcMO.getReg();
+    // This is slightly expensive to compute for physical regs since
+    // getMinimalPhysRegClass is slow.
     auto getRegClass = [&](unsigned Reg) {
       return TargetRegisterInfo::isVirtualRegister(Reg)
                  ? MRI.getRegClass(Reg)
                  : TRI.getMinimalPhysRegClass(Reg);
     };
-    const TargetRegisterClass &DstRC = *getRegClass(DstReg);
-    const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
-    if (DstRC.getSize() == SrcRC.getSize()) {
-      if (Ops[0] == 0)
+
+    if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
+      assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
+             "Mismatched register size in non subreg COPY");
+      if (IsSpill)
         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
-                            &SrcRC, &TRI);
+                            getRegClass(SrcReg), &TRI);
       else
-        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
+                             getRegClass(DstReg), &TRI);
       return &*--InsertPt;
     }
+
+    // Handle cases like spilling def of:
+    //
+    //   %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
+    //
+    // where the physical register source can be widened and stored to the full
+    // virtual reg destination stack slot, in this case producing:
+    //
+    //   STRXui %XZR, <fi#0>
+    //
+    if (IsSpill && DstMO.isUndef() &&
+        TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+      assert(SrcMO.getSubReg() == 0 &&
+             "Unexpected subreg on physical register");
+      const TargetRegisterClass *SpillRC;
+      unsigned SpillSubreg;
+      switch (DstMO.getSubReg()) {
+      default:
+        SpillRC = nullptr;
+        break;
+      case AArch64::sub_32:
+      case AArch64::ssub:
+        if (AArch64::GPR32RegClass.contains(SrcReg)) {
+          SpillRC = &AArch64::GPR64RegClass;
+          SpillSubreg = AArch64::sub_32;
+        } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
+          SpillRC = &AArch64::FPR64RegClass;
+          SpillSubreg = AArch64::ssub;
+        } else
+          SpillRC = nullptr;
+        break;
+      case AArch64::dsub:
+        if (AArch64::FPR64RegClass.contains(SrcReg)) {
+          SpillRC = &AArch64::FPR128RegClass;
+          SpillSubreg = AArch64::dsub;
+        } else
+          SpillRC = nullptr;
+        break;
+      }
+
+      if (SpillRC)
+        if (unsigned WidenedSrcReg =
+                TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
+          storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
+                              FrameIndex, SpillRC, &TRI);
+          return &*--InsertPt;
+        }
+    }
+
+    // Handle cases like filling use of:
+    //
+    //   %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
+    //
+    // where we can load the full virtual reg source stack slot, into the subreg
+    // destination, in this case producing:
+    //
+    //   LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
+    //
+    if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
+      const TargetRegisterClass *FillRC;
+      switch (DstMO.getSubReg()) {
+      default:
+        FillRC = nullptr;
+        break;
+      case AArch64::sub_32:
+        FillRC = &AArch64::GPR32RegClass;
+        break;
+      case AArch64::ssub:
+        FillRC = &AArch64::FPR32RegClass;
+        break;
+      case AArch64::dsub:
+        FillRC = &AArch64::FPR64RegClass;
+        break;
+      }
+
+      if (FillRC) {
+        assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
+               "Mismatched regclass size on folded subreg COPY");
+        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
+        MachineInstr &LoadMI = *--InsertPt;
+        MachineOperand &LoadDst = LoadMI.getOperand(0);
+        assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
+        LoadDst.setSubReg(DstMO.getSubReg());
+        LoadDst.setIsUndef();
+        return &LoadMI;
+      }
+    }
   }
 
   // Cannot fold.

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=291180&r1=291179&r2=291180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Thu Jan  5 15:51:42 2017
@@ -162,6 +162,10 @@ public:
                             int FrameIndex, const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const override;
 
+  // This tells target independent code that it is okay to pass instructions
+  // with subreg operands to foldMemoryOperandImpl.
+  bool isSubregFoldable() const override { return true; }
+
   using TargetInstrInfo::foldMemoryOperandImpl;
   MachineInstr *
   foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,

Added: llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir?rev=291180&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir (added)
+++ llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir Thu Jan  5 15:51:42 2017
@@ -0,0 +1,82 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs  -o - %s | FileCheck %s
+--- |
+  define i64 @test_subreg_spill_fold() { ret i64 0 }
+  define i64 @test_subreg_spill_fold2() { ret i64 0 }
+  define i64 @test_subreg_spill_fold3() { ret i64 0 }
+  define i64 @test_subreg_fill_fold() { ret i64 0 }
+  define double @test_subreg_fill_fold2() { ret double 0.0 }
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold
+# Ensure that the spilled subreg COPY is eliminated and folded into the spill store.
+name:            test_subreg_spill_fold
+registers:
+  - { id: 0, class: gpr64 }
+body:             |
+  bb.0:
+    ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+    undef %0.sub_32 = COPY %wzr
+    INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+    %x0 = COPY %0
+    RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold2
+# Similar to test_subreg_spill_fold, but with a vreg0 register class not containing %WZR.
+name:            test_subreg_spill_fold2
+registers:
+  - { id: 0, class: gpr64sp }
+body:             |
+  bb.0:
+    ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+    undef %0.sub_32 = COPY %wzr
+    INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+    %x0 = ADDXri %0, 1, 0
+    RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold3
+# Similar to test_subreg_spill_fold, but with a cross register class copy.
+name:            test_subreg_spill_fold3
+registers:
+  - { id: 0, class: fpr64 }
+body:             |
+  bb.0:
+    ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+    undef %0.ssub = COPY %wzr
+    INLINEASM $nop, 1, 12, implicit-def dead %d0, 12, implicit-def dead %d1, 12, implicit-def dead %d2, 12, implicit-def dead %d3, 12, implicit-def dead %d4, 12, implicit-def dead %d5, 12, implicit-def dead %d6, 12, implicit-def dead %d7, 12, implicit-def dead %d8, 12, implicit-def dead %d9, 12, implicit-def dead %d10, 12, implicit-def dead %d11, 12, implicit-def dead %d12, 12, implicit-def dead %d13, 12, implicit-def dead %d14, 12, implicit-def dead %d15, 12, implicit-def dead %d16, 12, implicit-def dead %d17, 12, implicit-def dead %d18, 12, implicit-def dead %d19, 12, implicit-def dead %d20, 12, implicit-def dead %d21, 12, implicit-def dead %d22, 12, implicit-def dead %d23, 12, implicit-def dead %d24, 12, implicit-def dead %d25, 12, implicit-def dead %d26, 12, implicit-def dead %d27, 12, implicit-def dead %d28, 12, implicit-def dead %d29, 12, implicit-def dead %d30, 12, implicit-def %d31
+    %x0 = COPY %0
+    RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_fill_fold
+# Ensure that the filled COPY is eliminated and folded into the fill load.
+name:            test_subreg_fill_fold
+registers:
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: gpr64 }
+body:             |
+  bb.0:
+    %0 = COPY %wzr
+    INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+    ; CHECK: undef %1.sub_32 = LDRWui %stack.0, 0 :: (load 4 from %stack.0)
+    undef %1.sub_32 = COPY %0
+    %x0 = COPY %1
+    RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_fill_fold2
+# Similar to test_subreg_fill_fold, but with a cross-class copy.
+name:            test_subreg_fill_fold2
+registers:
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: fpr64 }
+body:             |
+  bb.0:
+    %0 = COPY %wzr
+    INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+    ; CHECK: undef %1.ssub = LDRSui %stack.0, 0 :: (load 4 from %stack.0)
+    undef %1.ssub = COPY %0
+    %d0 = COPY %1
+    RET_ReallyLR implicit %d0
+...




More information about the llvm-commits mailing list