[llvm] ef4282e - [AArch64][GlobalISel] Avoid copies to target register bank for subregister copies
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 11:13:21 PST 2020
Author: Jessica Paquette
Date: 2020-03-05T11:13:02-08:00
New Revision: ef4282e0ee2d6539bbb92447a7464d9d089409f1
URL: https://github.com/llvm/llvm-project/commit/ef4282e0ee2d6539bbb92447a7464d9d089409f1
DIFF: https://github.com/llvm/llvm-project/commit/ef4282e0ee2d6539bbb92447a7464d9d089409f1.diff
LOG: [AArch64][GlobalISel] Avoid copies to target register bank for subregister copies
Previously for any copy from a register bigger than the destination:
Copied to a same-sized register in the destination register bank.
Subregister copy of that to the destination.
This fails for copies from 128-bit FPRs to GPRs because the GPR register bank
can't accomodate 128-bit values.
Instead of special-casing such copies to perform the truncation beforehand in
the source register bank, generalize this:
a) Perform a subregister copy straight from source register whenever possible.
This results in shorter MIR and fixes the above problem.
b) Perform a full copy to target bank and then do a subregister copy only if
source bank can't support target's size. E.g. GPR to 8-bit FPR copy.
Patch by Raul Tambre (tambre)!
Differential Revision: https://reviews.llvm.org/D75421
Added:
llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir
Modified:
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 2ec12effb8f6..ea94aca3e2f5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -448,6 +448,18 @@ static bool getSubRegForClass(const TargetRegisterClass *RC,
return true;
}
+/// Returns the minimum size the given register bank can hold.
+static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
+ switch (RB.getID()) {
+ case AArch64::GPRRegBankID:
+ return 32;
+ case AArch64::FPRRegBankID:
+ return 8;
+ default:
+ llvm_unreachable("Tried to get minimum size for unknown register bank.");
+ }
+}
+
/// Check whether \p I is a currently unsupported binary operation:
/// - it has an unsized type
/// - an operand is not a vreg
@@ -636,23 +648,20 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
}
#endif
-/// Helper function for selectCopy. Inserts a subregister copy from
-/// \p *From to \p *To, linking it up to \p I.
-///
-/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
+/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
+/// to \p *To.
///
-/// CopyReg (From class) = COPY SrcReg
-/// SubRegCopy (To class) = COPY CopyReg:SubReg
-/// Dst = COPY SubRegCopy
-static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
- const RegisterBankInfo &RBI, Register SrcReg,
- const TargetRegisterClass *From,
- const TargetRegisterClass *To,
- unsigned SubReg) {
+/// E.g "To = COPY SrcReg:SubReg"
+static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
+ const RegisterBankInfo &RBI, Register SrcReg,
+ const TargetRegisterClass *To, unsigned SubReg) {
+ assert(SrcReg.isValid() && "Expected a valid source register?");
+ assert(To && "Destination register class cannot be null");
+ assert(SubReg && "Expected a valid subregister");
+
MachineIRBuilder MIB(I);
- auto Copy = MIB.buildCopy({From}, {SrcReg});
- auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
- .addReg(Copy.getReg(0), 0, SubReg);
+ auto SubRegCopy =
+ MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(SubRegCopy.getReg(0));
@@ -747,25 +756,28 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
- // If we're doing a cross-bank copy on
diff erent-sized registers, we need
- // to do a bit more work.
+ // If the source register is bigger than the destination we need to perform
+ // a subregister copy.
if (SrcSize > DstSize) {
- // We're doing a cross-bank copy into a smaller register. We need a
- // subregister copy. First, get a register class that's on the same bank
- // as the destination, but the same size as the source.
- const TargetRegisterClass *SubregRC =
- getMinClassForRegBank(DstRegBank, SrcSize, true);
- assert(SubregRC && "Didn't get a register class for subreg?");
-
- // Get the appropriate subregister for the destination.
unsigned SubReg = 0;
- if (!getSubRegForClass(DstRC, TRI, SubReg)) {
- LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
- return false;
+
+ // If the source bank doesn't support a subregister copy small enough,
+ // then we first need to copy to the destination bank.
+ if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
+ const TargetRegisterClass *SubregRC = getMinClassForRegBank(
+ DstRegBank, SrcSize, /* GetAllRegSet = */ true);
+ getSubRegForClass(DstRC, TRI, SubReg);
+
+ MachineIRBuilder MIB(I);
+ auto Copy = MIB.buildCopy({SubregRC}, {SrcReg});
+ copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
+ } else {
+ const TargetRegisterClass *SubregRC = getMinClassForRegBank(
+ SrcRegBank, DstSize, /* GetAllRegSet = */ true);
+ getSubRegForClass(SubregRC, TRI, SubReg);
+ copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
}
- // Now, insert a subregister copy using the new register class.
- selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
return CheckCopy();
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
index e7c95fbe1063..6b4b51d37ca8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
@@ -412,10 +412,9 @@ body: |
; CHECK: liveins: $x0
; CHECK: %base:gpr64sp = COPY $x0
; CHECK: %imp:gpr64 = IMPLICIT_DEF
- ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %imp
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
- ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY2]], 0, 1 :: (load 8)
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %imp.sub_32
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+ ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY1]], 0, 1 :: (load 8)
; CHECK: $x1 = COPY %load
; CHECK: RET_ReallyLR implicit $x1
%base:gpr(p0) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
index bd65bfc01741..f74f87b2f67f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
@@ -85,10 +85,9 @@ body: |
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %copy:gpr64 = COPY $x0
; CHECK: %fold_me:gpr64sp = ANDXri %copy, 4098
- ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
- ; CHECK: TBNZW [[COPY2]], 3, %bb.1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+ ; CHECK: TBNZW [[COPY1]], 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
index af8e03be913b..977bb5a64cf5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
@@ -113,10 +113,9 @@ body: |
; CHECK: %copy:gpr32 = COPY $w0
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32
; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
- ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %zext
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
- ; CHECK: TBNZW [[COPY2]], 3, %bb.1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %zext.sub_32
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+ ; CHECK: TBNZW [[COPY1]], 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: $x0 = COPY %zext
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
index c71822475786..d236a6c5ce59 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
@@ -49,10 +49,9 @@ body: |
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %copy:gpr64 = COPY $x0
; CHECK: %fold_me:gpr64 = UBFMXri %copy, 59, 58
- ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
- ; CHECK: TBNZW [[COPY2]], 3, %bb.1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+ ; CHECK: TBNZW [[COPY1]], 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
@@ -87,10 +86,9 @@ body: |
; CHECK: %copy:gpr64 = COPY $x0
; CHECK: %fold_cst:gpr64 = MOVi64imm -5
; CHECK: %fold_me:gpr64 = LSLVXr %copy, %fold_cst
- ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
- ; CHECK: TBNZW [[COPY2]], 3, %bb.1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+ ; CHECK: TBNZW [[COPY1]], 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
@@ -125,10 +123,9 @@ body: |
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %copy:gpr64 = COPY $x0
; CHECK: %shl:gpr64 = UBFMXri %copy, 62, 61
- ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %shl
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
- ; CHECK: TBNZW [[COPY2]], 3, %bb.1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %shl.sub_32
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+ ; CHECK: TBNZW [[COPY1]], 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: %second_use:gpr64sp = ORRXri %shl, 8000
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir
new file mode 100644
index 000000000000..efb999909ccc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir
@@ -0,0 +1,36 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: test_128_fpr_truncation
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_128_fpr_truncation
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $x0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRQui]].ssub
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+ ; CHECK: TBNZW [[COPY2]], 0, %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ liveins: $x0
+ %1:gpr(p0) = COPY $x0
+ %3:gpr(s64) = G_CONSTANT i64 1
+ %5:gpr(s64) = G_CONSTANT i64 0
+ %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16)
+ %2:fpr(s64) = G_TRUNC %0:fpr(s128)
+ %8:gpr(s64) = COPY %2:fpr(s64)
+ %4:gpr(s64) = G_AND %8:gpr, %3:gpr
+ %7:gpr(s32) = G_ICMP intpred(ne), %4:gpr(s64), %5:gpr
+ %6:gpr(s1) = G_TRUNC %7:gpr(s32)
+ G_BRCOND %6:gpr(s1), %bb.1
+
+ bb.1:
+ RET_ReallyLR
+...
More information about the llvm-commits
mailing list