[llvm] ef4282e - [AArch64][GlobalISel] Avoid copies to target register bank for subregister copies

Thu Mar 5 11:13:21 PST 2020

Author: Jessica Paquette
Date: 2020-03-05T11:13:02-08:00
New Revision: ef4282e0ee2d6539bbb92447a7464d9d089409f1

URL: https://github.com/llvm/llvm-project/commit/ef4282e0ee2d6539bbb92447a7464d9d089409f1
DIFF: https://github.com/llvm/llvm-project/commit/ef4282e0ee2d6539bbb92447a7464d9d089409f1.diff

LOG: [AArch64][GlobalISel] Avoid copies to target register bank for subregister copies

Previously for any copy from a register bigger than the destination:

Copied to a same-sized register in the destination register bank.
Subregister copy of that to the destination.
This fails for copies from 128-bit FPRs to GPRs because the GPR register bank
can't accomodate 128-bit values.

Instead of special-casing such copies to perform the truncation beforehand in
the source register bank, generalize this:
a) Perform a subregister copy straight from source register whenever possible.
This results in shorter MIR and fixes the above problem.

b) Perform a full copy to target bank and then do a subregister copy only if
source bank can't support target's size. E.g. GPR to 8-bit FPR copy.

Patch by Raul Tambre (tambre)!

Differential Revision: https://reviews.llvm.org/D75421

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
    llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
    llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
    llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 2ec12effb8f6..ea94aca3e2f5 100644

--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -448,6 +448,18 @@ static bool getSubRegForClass(const TargetRegisterClass *RC,
   return true;
 }
 
+/// Returns the minimum size the given register bank can hold.
+static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
+  switch (RB.getID()) {
+  case AArch64::GPRRegBankID:
+    return 32;
+  case AArch64::FPRRegBankID:
+    return 8;
+  default:
+    llvm_unreachable("Tried to get minimum size for unknown register bank.");
+  }
+}
+
 /// Check whether \p I is a currently unsupported binary operation:
 /// - it has an unsized type
 /// - an operand is not a vreg
@@ -636,23 +648,20 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
 }
 #endif
 
-/// Helper function for selectCopy. Inserts a subregister copy from
-/// \p *From to \p *To, linking it up to \p I.
-///
-/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
+/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
+/// to \p *To.
 ///
-/// CopyReg (From class) = COPY SrcReg
-/// SubRegCopy (To class) = COPY CopyReg:SubReg
-/// Dst = COPY SubRegCopy
-static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
-                                  const RegisterBankInfo &RBI, Register SrcReg,
-                                  const TargetRegisterClass *From,
-                                  const TargetRegisterClass *To,
-                                  unsigned SubReg) {
+/// E.g "To = COPY SrcReg:SubReg"
+static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
+                       const RegisterBankInfo &RBI, Register SrcReg,
+                       const TargetRegisterClass *To, unsigned SubReg) {
+  assert(SrcReg.isValid() && "Expected a valid source register?");
+  assert(To && "Destination register class cannot be null");
+  assert(SubReg && "Expected a valid subregister");
+
   MachineIRBuilder MIB(I);
-  auto Copy = MIB.buildCopy({From}, {SrcReg});
-  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
-                        .addReg(Copy.getReg(0), 0, SubReg);
+  auto SubRegCopy =
+      MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
   MachineOperand &RegOp = I.getOperand(1);
   RegOp.setReg(SubRegCopy.getReg(0));
 
@@ -747,25 +756,28 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
 
-    // If we're doing a cross-bank copy on 
diff erent-sized registers, we need
-    // to do a bit more work.
+    // If the source register is bigger than the destination we need to perform
+    // a subregister copy.
     if (SrcSize > DstSize) {
-      // We're doing a cross-bank copy into a smaller register. We need a
-      // subregister copy. First, get a register class that's on the same bank
-      // as the destination, but the same size as the source.
-      const TargetRegisterClass *SubregRC =
-          getMinClassForRegBank(DstRegBank, SrcSize, true);
-      assert(SubregRC && "Didn't get a register class for subreg?");
-
-      // Get the appropriate subregister for the destination.
       unsigned SubReg = 0;
-      if (!getSubRegForClass(DstRC, TRI, SubReg)) {
-        LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
-        return false;
+
+      // If the source bank doesn't support a subregister copy small enough,
+      // then we first need to copy to the destination bank.
+      if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
+        const TargetRegisterClass *SubregRC = getMinClassForRegBank(
+            DstRegBank, SrcSize, /* GetAllRegSet = */ true);
+        getSubRegForClass(DstRC, TRI, SubReg);
+
+        MachineIRBuilder MIB(I);
+        auto Copy = MIB.buildCopy({SubregRC}, {SrcReg});
+        copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
+      } else {
+        const TargetRegisterClass *SubregRC = getMinClassForRegBank(
+            SrcRegBank, DstSize, /* GetAllRegSet = */ true);
+        getSubRegForClass(SubregRC, TRI, SubReg);
+        copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
       }
 
-      // Now, insert a subregister copy using the new register class.
-      selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
       return CheckCopy();
     }
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
index e7c95fbe1063..6b4b51d37ca8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
@@ -412,10 +412,9 @@ body:             |
     ; CHECK: liveins: $x0
     ; CHECK: %base:gpr64sp = COPY $x0
     ; CHECK: %imp:gpr64 = IMPLICIT_DEF
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %imp
-    ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
-    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
-    ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY2]], 0, 1 :: (load 8)
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %imp.sub_32
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY1]], 0, 1 :: (load 8)
     ; CHECK: $x1 = COPY %load
     ; CHECK: RET_ReallyLR implicit $x1
     %base:gpr(p0) = COPY $x0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
index bd65bfc01741..f74f87b2f67f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
@@ -85,10 +85,9 @@ body:             |
   ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK:   %copy:gpr64 = COPY $x0
   ; CHECK:   %fold_me:gpr64sp = ANDXri %copy, 4098
-  ; CHECK:   [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
-  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
-  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
-  ; CHECK:   TBNZW [[COPY2]], 3, %bb.1
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+  ; CHECK:   TBNZW [[COPY1]], 3, %bb.1
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
index af8e03be913b..977bb5a64cf5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
@@ -113,10 +113,9 @@ body:             |
   ; CHECK:   %copy:gpr32 = COPY $w0
   ; CHECK:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32
   ; CHECK:   %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
-  ; CHECK:   [[COPY:%[0-9]+]]:gpr64all = COPY %zext
-  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
-  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
-  ; CHECK:   TBNZW [[COPY2]], 3, %bb.1
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32all = COPY %zext.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+  ; CHECK:   TBNZW [[COPY1]], 3, %bb.1
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   $x0 = COPY %zext

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
index c71822475786..d236a6c5ce59 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
@@ -49,10 +49,9 @@ body:             |
   ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK:   %copy:gpr64 = COPY $x0
   ; CHECK:   %fold_me:gpr64 = UBFMXri %copy, 59, 58
-  ; CHECK:   [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
-  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
-  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
-  ; CHECK:   TBNZW [[COPY2]], 3, %bb.1
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+  ; CHECK:   TBNZW [[COPY1]], 3, %bb.1
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR
@@ -87,10 +86,9 @@ body:             |
   ; CHECK:   %copy:gpr64 = COPY $x0
   ; CHECK:   %fold_cst:gpr64 = MOVi64imm -5
   ; CHECK:   %fold_me:gpr64 = LSLVXr %copy, %fold_cst
-  ; CHECK:   [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
-  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
-  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
-  ; CHECK:   TBNZW [[COPY2]], 3, %bb.1
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+  ; CHECK:   TBNZW [[COPY1]], 3, %bb.1
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR
@@ -125,10 +123,9 @@ body:             |
   ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK:   %copy:gpr64 = COPY $x0
   ; CHECK:   %shl:gpr64 = UBFMXri %copy, 62, 61
-  ; CHECK:   [[COPY:%[0-9]+]]:gpr64all = COPY %shl
-  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
-  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
-  ; CHECK:   TBNZW [[COPY2]], 3, %bb.1
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32all = COPY %shl.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+  ; CHECK:   TBNZW [[COPY1]], 3, %bb.1
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   %second_use:gpr64sp = ORRXri %shl, 8000

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir
new file mode 100644
index 000000000000..efb999909ccc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir
@@ -0,0 +1,36 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+---
+name:              test_128_fpr_truncation
+alignment:         4
+legalized:         true
+regBankSelected:   true
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: test_128_fpr_truncation
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $x0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+  ; CHECK:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRQui]].ssub
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+  ; CHECK:   TBNZW [[COPY2]], 0, %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    liveins: $x0
+    %1:gpr(p0) = COPY $x0
+    %3:gpr(s64) = G_CONSTANT i64 1
+    %5:gpr(s64) = G_CONSTANT i64 0
+    %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16)
+    %2:fpr(s64) = G_TRUNC %0:fpr(s128)
+    %8:gpr(s64) = COPY %2:fpr(s64)
+    %4:gpr(s64) = G_AND %8:gpr, %3:gpr
+    %7:gpr(s32) = G_ICMP intpred(ne), %4:gpr(s64), %5:gpr
+    %6:gpr(s1) = G_TRUNC %7:gpr(s32)
+    G_BRCOND %6:gpr(s1), %bb.1
+
+  bb.1:
+    RET_ReallyLR
+...