[llvm] a86bbe1 - [AArch64][GlobalISel] Handle any-extending FPR loads in manual selection code.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 1 10:19:41 PDT 2021
Author: Amara Emerson
Date: 2021-09-01T10:19:22-07:00
New Revision: a86bbe1e3191800d42abf073a060eb8601b8be37
URL: https://github.com/llvm/llvm-project/commit/a86bbe1e3191800d42abf073a060eb8601b8be37
DIFF: https://github.com/llvm/llvm-project/commit/a86bbe1e3191800d42abf073a060eb8601b8be37.diff
LOG: [AArch64][GlobalISel] Handle any-extending FPR loads in manual selection code.
When we have an any-extending FPR bank load, none of the tablegen patterns
match and we fall back to the C++ selector. Like with the truncating stores
that were fixed recently, the C++ wasn't able to handle it and ended up
generating invalid copies between different size regclasses.
This change adds handling for this case, splitting the load into a regular
load and a SUBREG_TO_REG to extend it into the original wide destination reg.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index c2951c4f6ecb2..930f836719259 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2764,6 +2764,30 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
.getReg(0);
RBI.constrainGenericRegister(Copy, *RC, MRI);
LdSt.getOperand(0).setReg(Copy);
+ } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+ // If this is an any-extending load from the FPR bank, split it into a regular
+ // load + extend.
+ if (RB.getID() == AArch64::FPRRegBankID) {
+ unsigned SubReg;
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+ Register OldDst = LdSt.getReg(0);
+ Register NewDst =
+ MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
+ LdSt.getOperand(0).setReg(NewDst);
+ MRI.setRegBank(NewDst, RB);
+ // Generate a SUBREG_TO_REG to extend it.
+ MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
+ .addImm(0)
+ .addUse(NewDst)
+ .addImm(SubReg);
+ auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
+ RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
+ MIB.setInstr(LdSt);
+ }
}
// Helper lambda for partially selecting I. Either returns the original
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
index 4339005ea7f99..f313e08178dbb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
@@ -38,6 +38,8 @@
define void @load_4xi32(<4 x i32>* %ptr) { ret void }
define void @load_8xi16(<8 x i16>* %ptr) { ret void }
define void @load_16xi8(<16 x i8>* %ptr) { ret void }
+ define void @anyext_on_fpr() { ret void }
+ define void @anyext_on_fpr8() { ret void }
...
@@ -638,3 +640,69 @@ body: |
RET_ReallyLR implicit $q0
...
+---
+name: anyext_on_fpr
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+ - { reg: '$w3' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $w3, $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: anyext_on_fpr
+ ; CHECK: liveins: $w3, $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16))
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRHui]], %subreg.hsub
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK: $w0 = COPY [[COPY1]]
+ ; CHECK: RET_ReallyLR
+ %0:gpr(p0) = COPY $x0
+ %16:fpr(s32) = G_LOAD %0(p0) :: (load (s16))
+ %24:gpr(s32) = COPY %16(s32)
+ $w0 = COPY %24(s32)
+ RET_ReallyLR
+
+...
+---
+name: anyext_on_fpr8
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+ - { reg: '$w3' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $w3, $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: anyext_on_fpr8
+ ; CHECK: liveins: $w3, $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8))
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRBui]], %subreg.bsub
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK: $w0 = COPY [[COPY1]]
+ ; CHECK: RET_ReallyLR
+ %0:gpr(p0) = COPY $x0
+ %16:fpr(s32) = G_LOAD %0(p0) :: (load (s8))
+ %24:gpr(s32) = COPY %16(s32)
+ $w0 = COPY %24(s32)
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list