[llvm] r288439 - [AArch64] Fold more spilled/refilled COPYs.

Geoff Berry via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 1 15:43:56 PST 2016


Author: gberry
Date: Thu Dec  1 17:43:55 2016
New Revision: 288439

URL: http://llvm.org/viewvc/llvm-project?rev=288439&view=rev
Log:
[AArch64] Fold more spilled/refilled COPYs.

Summary:
Make AArch64InstrInfo::foldMemoryOperandImpl more general by folding all
full COPYs between register classes of the same size that are either
spilled or refilled.

Reviewers: MatzeB, qcolombet

Subscribers: aemerson, rengolin, mcrosier, llvm-commits

Differential Revision: https://reviews.llvm.org/D27271

Added:
    llvm/trunk/test/CodeGen/AArch64/spill-fold.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/trunk/test/CodeGen/AArch64/zero-reg.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=288439&r1=288438&r2=288439&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Dec  1 17:43:55 2016
@@ -2598,8 +2598,8 @@ MachineInstr *AArch64InstrInfo::foldMemo
     }
   }
 
-  // Handle the case where a WZR/XZR copy is being spilled but the destination
-  // register class doesn't contain WZR/XZR.  For example:
+  // Handle the case where a copy is being spilled or refilled but the source
+  // and destination register class don't match.  For example:
   //
   //   %vreg0<def> = COPY %XZR; GPR64common:%vreg0
   //
@@ -2608,17 +2608,43 @@ MachineInstr *AArch64InstrInfo::foldMemo
   //
   //   STRXui %XZR, <fi#0>
   //
-  if (MI.isFullCopy() && Ops.size() == 1 && Ops[0] == 0) {
+  // This also eliminates spilled cross register class COPYs (e.g. between x and
+  // d regs) of the same size.  For example:
+  //
+  //   %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
+  //
+  // will be refilled as
+  //
+  //   LDRDui %vreg0, fi<#0>
+  //
+  // instead of
+  //
+  //   LDRXui %vregTemp, fi<#0>
+  //   %vreg0 = FMOV %vregTemp
+  //
+  if (MI.isFullCopy() && Ops.size() == 1 &&
+      // Make sure we're only folding the explicit COPY defs/uses.
+      (Ops[0] == 0 || Ops[0] == 1)) {
+    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+    const MachineRegisterInfo &MRI = MF.getRegInfo();
     MachineBasicBlock &MBB = *MI.getParent();
+    const MachineOperand &DstMO = MI.getOperand(0);
     const MachineOperand &SrcMO = MI.getOperand(1);
+    unsigned DstReg = DstMO.getReg();
     unsigned SrcReg = SrcMO.getReg();
-    if (SrcReg == AArch64::WZR || SrcReg == AArch64::XZR) {
-      const TargetRegisterInfo &TRI = getRegisterInfo();
-      const TargetRegisterClass &RC = SrcReg == AArch64::WZR
-                                          ? AArch64::GPR32RegClass
-                                          : AArch64::GPR64RegClass;
-      storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
-                          &RC, &TRI);
+    auto getRegClass = [&](unsigned Reg) {
+      return TargetRegisterInfo::isVirtualRegister(Reg)
+                 ? MRI.getRegClass(Reg)
+                 : TRI.getMinimalPhysRegClass(Reg);
+    };
+    const TargetRegisterClass &DstRC = *getRegClass(DstReg);
+    const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
+    if (DstRC.getSize() == SrcRC.getSize()) {
+      if (Ops[0] == 0)
+        storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
+                            &SrcRC, &TRI);
+      else
+        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
       return &*--InsertPt;
     }
   }

Added: llvm/trunk/test/CodeGen/AArch64/spill-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/spill-fold.ll?rev=288439&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/spill-fold.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/spill-fold.ll Thu Dec  1 17:43:55 2016
@@ -0,0 +1,78 @@
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+declare i32 @bar()
+declare i32 @baz()
+
+; Check that the spill of the zero value gets stored directly instead
+; of being copied from wzr and then stored.
+define i32 @test_zr_spill_fold1(i1 %c) {
+; CHECK-LABEL: test_zr_spill_fold1:
+entry:
+  br i1 %c, label %if.else, label %if.then
+
+if.else:
+; CHECK: bl bar
+; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
+  %call1 = tail call i32 @bar()
+  br label %if.end
+
+if.then:
+; CHECK: bl baz
+; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
+  %call2 = tail call i32 @baz()
+  br label %if.end
+
+if.end:
+  %x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
+  ret i32 %x.0
+}
+
+; Similar to test_zr_spill_fold1, but with mis-matched register
+; class between %x.0 and the 0 from %if.then.
+define i32 @test_zr_spill_fold2(i1 %c) {
+; CHECK-LABEL: test_zr_spill_fold2:
+entry:
+  br i1 %c, label %if.else, label %if.then
+
+if.else:
+; CHECK: bl bar
+; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
+  %call1 = tail call i32 @bar()
+  br label %if.end
+
+if.then:
+; CHECK: bl baz
+; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
+  %call2 = tail call i32 @baz()
+  br label %if.end
+
+if.end:
+  %x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
+  %x.1 = add i32 %x.0, 1
+  ret i32 %x.1
+}
+
+; Similar to test_zr_spill_fold1, but with a cross register-class copy feeding a spill store.
+define float @test_cross_spill_fold(i32 %v) {
+; CHECK-LABEL: test_cross_spill_fold:
+entry:
+; CHECK: str w0, [sp, #[[SLOT:[0-9]+]]]
+  %v.f = bitcast i32 %v to float
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
+; CHECK: ldr s0, [sp, #[[SLOT]]]
+  ret float %v.f
+}
+
+; Similar to test_cross_spill_fold, but with a cross register-class copy fed by a refill load.
+define float @test_cross_spill_fold2(i32 %v) {
+; CHECK-LABEL: test_cross_spill_fold2:
+entry:
+; CHECK: str w0, [sp, #[[SLOT:[0-9]+]]]
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
+; CHECK: ldr s0, [sp, #[[SLOT]]]
+  %v.f = bitcast i32 %v to float
+  ret float %v.f
+}
+

Modified: llvm/trunk/test/CodeGen/AArch64/zero-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/zero-reg.ll?rev=288439&r1=288438&r2=288439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/zero-reg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/zero-reg.ll Thu Dec  1 17:43:55 2016
@@ -28,57 +28,3 @@ define void @test_sp(i32 %val) {
   ret void
 ; CHECK: ret
 }
-
-declare i32 @bar()
-declare i32 @baz()
-
-; Check that the spill of the zero value gets stored directly instead
-; of being copied from wzr and then stored.
-define i32 @test_zr_spill_copyprop1(i1 %c) {
-; CHECK-LABEL: test_zr_spill_copyprop1:
-entry:
-  br i1 %c, label %if.else, label %if.then
-
-if.else:
-; CHECK: bl bar
-; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
-  %call1 = tail call i32 @bar()
-  br label %if.end
-
-if.then:
-; CHECK: bl baz
-; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
-  %call2 = tail call i32 @baz()
-  br label %if.end
-
-if.end:
-  %x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
-  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
-  ret i32 %x.0
-}
-
-; Similar to test_zr_spill_copyprop1, but with mis-matched register
-; class between %x.0 and the 0 from %if.then.
-define i32 @test_zr_spill_copyprop2(i1 %c) {
-; CHECK-LABEL: test_zr_spill_copyprop2:
-entry:
-  br i1 %c, label %if.else, label %if.then
-
-if.else:
-; CHECK: bl bar
-; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
-  %call1 = tail call i32 @bar()
-  br label %if.end
-
-if.then:
-; CHECK: bl baz
-; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
-  %call2 = tail call i32 @baz()
-  br label %if.end
-
-if.end:
-  %x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
-  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
-  %x.1 = add i32 %x.0, 1
-  ret i32 %x.1
-}




More information about the llvm-commits mailing list