[llvm] r356213 - [AArch64][GlobalISel] Implement selection for G_UNMERGE of vectors to vectors.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 14 15:48:19 PDT 2019
Author: aemerson
Date: Thu Mar 14 15:48:18 2019
New Revision: 356213
URL: http://llvm.org/viewvc/llvm-project?rev=356213&view=rev
Log:
[AArch64][GlobalISel] Implement selection for G_UNMERGE of vectors to vectors.
This re-uses the previous support for extract vector elt to extract the
subvectors.
Differential Revision: https://reviews.llvm.org/D59390
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp?rev=356213&r1=356212&r2=356213&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp Thu Mar 14 15:48:18 2019
@@ -93,6 +93,8 @@ private:
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectSplitVectorUnmerge(MachineInstr &I,
+ MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
@@ -102,6 +104,10 @@ private:
MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
unsigned Op2,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
+ const RegisterBank &DstRB, LLT ScalarTy,
+ unsigned VecReg, unsigned LaneIdx,
+ MachineIRBuilder &MIRBuilder) const;
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
@@ -1870,6 +1876,68 @@ static bool getConstantValueForReg(unsig
return true;
}
+MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
+ Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
+ unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ unsigned CopyOpc = 0;
+ unsigned ExtractSubReg = 0;
+ if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
+ LLVM_DEBUG(
+ dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
+ return nullptr;
+ }
+
+ const TargetRegisterClass *DstRC =
+ getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
+ if (!DstRC) {
+ LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
+ return nullptr;
+ }
+
+ const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
+ const LLT &VecTy = MRI.getType(VecReg);
+ const TargetRegisterClass *VecRC =
+ getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
+ if (!VecRC) {
+ LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
+ return nullptr;
+ }
+
+ // The register that we're going to copy into.
+ unsigned InsertReg = VecReg;
+ if (!DstReg)
+ DstReg = MRI.createVirtualRegister(DstRC);
+ // If the lane index is 0, we just use a subregister COPY.
+ if (LaneIdx == 0) {
+ auto CopyMI =
+ BuildMI(MIRBuilder.getMBB(), MIRBuilder.getInsertPt(),
+ MIRBuilder.getDL(), TII.get(TargetOpcode::COPY), *DstReg)
+ .addUse(VecReg, 0, ExtractSubReg);
+ RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
+ return &*CopyMI;
+ }
+
+ // Lane copies require 128-bit wide registers. If we're dealing with an
+ // unpacked vector, then we need to move up to that width. Insert an implicit
+ // def and a subregister insert to get us there.
+ if (VecTy.getSizeInBits() != 128) {
+ MachineInstr *ScalarToVector = emitScalarToVector(
+ VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
+ if (!ScalarToVector)
+ return nullptr;
+ InsertReg = ScalarToVector->getOperand(0).getReg();
+ }
+
+ MachineInstr *LaneCopyMI =
+ MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
+ constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
+
+ // Make sure that we actually constrain the initial copy.
+ RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
+ return LaneCopyMI;
+}
+
bool AArch64InstructionSelector::selectExtractElt(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
@@ -1878,7 +1946,7 @@ bool AArch64InstructionSelector::selectE
const LLT NarrowTy = MRI.getType(DstReg);
const unsigned SrcReg = I.getOperand(1).getReg();
const LLT WideTy = MRI.getType(SrcReg);
-
+ (void)WideTy;
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!");
assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
@@ -1897,63 +1965,44 @@ bool AArch64InstructionSelector::selectE
if (!getConstantValueForReg(LaneIdxOp.getReg(), MRI, LaneIdx))
return false;
- unsigned CopyOpc = 0;
- unsigned ExtractSubReg = 0;
- if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) {
- LLVM_DEBUG(
- dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
- return false;
- }
+ MachineIRBuilder MIRBuilder(I);
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
- const TargetRegisterClass *DstRC =
- getRegClassForTypeOnBank(NarrowTy, DstRB, RBI, true);
- if (!DstRC) {
- LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
+ MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
+ LaneIdx, MIRBuilder);
+ if (!Extract)
return false;
- }
- const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
- const TargetRegisterClass *SrcRC =
- getRegClassForTypeOnBank(WideTy, SrcRB, RBI, true);
- if (!SrcRC) {
- LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
- return false;
- }
+ I.eraseFromParent();
+ return true;
+}
- // The register that we're going to copy into.
- unsigned InsertReg = SrcReg;
- MachineIRBuilder MIRBuilder(I);
+bool AArch64InstructionSelector::selectSplitVectorUnmerge(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ unsigned NumElts = I.getNumOperands() - 1;
+ unsigned SrcReg = I.getOperand(NumElts).getReg();
+ const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
+ const LLT SrcTy = MRI.getType(SrcReg);
- // If the lane index is 0, we just use a subregister COPY.
- if (LaneIdx == 0) {
- unsigned CopyTo = I.getOperand(0).getReg();
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
- CopyTo)
- .addUse(SrcReg, 0, ExtractSubReg);
- RBI.constrainGenericRegister(CopyTo, *DstRC, MRI);
- I.eraseFromParent();
- return true;
+ assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
+ if (SrcTy.getSizeInBits() > 128) {
+ LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
+ return false;
}
- // Lane copies require 128-bit wide registers. If we're dealing with an
- // unpacked vector, then we need to move up to that width. Insert an implicit
- // def and a subregister insert to get us there.
- if (WideTy.getSizeInBits() != 128) {
- MachineInstr *ScalarToVector = emitScalarToVector(
- WideTy.getSizeInBits(), &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
- if (!ScalarToVector)
+ MachineIRBuilder MIB(I);
+
+ // We implement a split vector operation by treating the sub-vectors as
+ // scalars and extracting them.
+ const RegisterBank &DstRB =
+ *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
+ for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
+ unsigned Dst = I.getOperand(OpIdx).getReg();
+ MachineInstr *Extract =
+ emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
+ if (!Extract)
return false;
- InsertReg = ScalarToVector->getOperand(0).getReg();
}
-
- MachineInstr *LaneCopyMI =
- MIRBuilder.buildInstr(CopyOpc, {DstReg}, {InsertReg}).addImm(LaneIdx);
- constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
-
- // Make sure that we actually constrain the initial copy.
- RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
-
I.eraseFromParent();
return true;
}
@@ -1984,11 +2033,8 @@ bool AArch64InstructionSelector::selectU
assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!");
- // TODO: Handle unmerging into vectors.
- if (!NarrowTy.isScalar()) {
- LLVM_DEBUG(dbgs() << "Vector-to-vector unmerges not supported yet.\n");
- return false;
- }
+ if (!NarrowTy.isScalar())
+ return selectSplitVectorUnmerge(I, MRI);
// Choose a lane copy opcode and subregister based off of the size of the
// vector's elements.
Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir?rev=356213&r1=356212&r2=356213&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir Thu Mar 14 15:48:18 2019
@@ -19,6 +19,14 @@
ret <8 x half> %a
}
+ define <2 x float> @test_vecsplit_2v2s32_v4s32(<4 x float> %a) {
+ ret <2 x float> undef
+ }
+
+ define <2 x half> @test_vecsplit_2v2s16_v4s16(<4 x half> %a) {
+ ret <2 x half> undef
+ }
+
...
---
name: test_v2s64_unmerge
@@ -152,3 +160,51 @@ body: |
$q0 = COPY %1(<8 x s16>)
RET_ReallyLR implicit $q0
...
+---
+name: test_vecsplit_2v2s32_v4s32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $q0
+ ; CHECK-LABEL: name: test_vecsplit_2v2s32_v4s32
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub
+ ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1
+ ; CHECK: $d0 = COPY [[COPY1]]
+ ; CHECK: $d1 = COPY [[CPYi64_]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(<4 x s32>) = COPY $q0
+ %1:fpr(<2 x s32>), %2:fpr(<2 x s32>) = G_UNMERGE_VALUES %0(<4 x s32>)
+ $d0 = COPY %1(<2 x s32>)
+ $d1 = COPY %2(<2 x s32>)
+ RET_ReallyLR implicit $d0
+...
+---
+name: test_vecsplit_2v2s16_v4s16
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $d0
+ ; CHECK-LABEL: name: test_vecsplit_2v2s16_v4s16
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1
+ ; CHECK: $s0 = COPY [[COPY1]]
+ ; CHECK: $s1 = COPY [[CPYi32_]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:fpr(<4 x s16>) = COPY $d0
+ %1:fpr(<2 x s16>), %2:fpr(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>)
+ $s0 = COPY %1(<2 x s16>)
+ $s1 = COPY %2(<2 x s16>)
+ RET_ReallyLR implicit $s0
+...
More information about the llvm-commits
mailing list