[llvm] r355871 - Recommit "[GlobalISel][AArch64] Add selection support for G_EXTRACT_VECTOR_ELT"
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 11 15:18:01 PDT 2019
Author: paquette
Date: Mon Mar 11 15:18:01 2019
New Revision: 355871
URL: http://llvm.org/viewvc/llvm-project?rev=355871&view=rev
Log:
Recommit "[GlobalISel][AArch64] Add selection support for G_EXTRACT_VECTOR_ELT"
After r355865, we should be able to safely select G_EXTRACT_VECTOR_ELT without
running into any problematic intrinsics.
Also add a fix for lane copies, which don't support index 0.
Added:
llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir
llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp?rev=355871&r1=355870&r2=355871&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp Mon Mar 11 15:18:01 2019
@@ -78,6 +78,7 @@ private:
void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
SmallVectorImpl<int> &Idxs) const;
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
@@ -1709,6 +1710,8 @@ bool AArch64InstructionSelector::select(
return selectUnmergeValues(I, MRI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return selectShuffleVector(I, MRI);
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return selectExtractElt(I, MRI);
}
return false;
@@ -1787,6 +1790,138 @@ bool AArch64InstructionSelector::selectM
return true;
}
+static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
+ const unsigned EltSize) {
+ // Choose a lane copy opcode and subregister based off of the size of the
+ // vector's elements.
+ switch (EltSize) {
+ case 16:
+ CopyOpc = AArch64::CPYi16;
+ ExtractSubReg = AArch64::hsub;
+ break;
+ case 32:
+ CopyOpc = AArch64::CPYi32;
+ ExtractSubReg = AArch64::ssub;
+ break;
+ case 64:
+ CopyOpc = AArch64::CPYi64;
+ ExtractSubReg = AArch64::dsub;
+ break;
+ default:
+ // Unknown size, bail out.
+ LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
+ return false;
+ }
+ return true;
+}
+
+bool AArch64InstructionSelector::selectExtractElt(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
+ "unexpected opcode!");
+ unsigned DstReg = I.getOperand(0).getReg();
+ const LLT NarrowTy = MRI.getType(DstReg);
+ const unsigned SrcReg = I.getOperand(1).getReg();
+ const LLT WideTy = MRI.getType(SrcReg);
+
+ assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
+ "source register size too small!");
+ assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
+
+ // Need the lane index to determine the correct copy opcode.
+ MachineOperand &LaneIdxOp = I.getOperand(2);
+ assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
+
+ if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
+ LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
+ return false;
+ }
+
+ // Find the instruction that defines the constant to extract from. There could
+ // be any number of copies between the instruction and the definition of the
+ // index. Skip them.
+ MachineInstr *LaneDefInst = nullptr;
+ for (LaneDefInst = MRI.getVRegDef(LaneIdxOp.getReg());
+ LaneDefInst && LaneDefInst->isCopy();
+ LaneDefInst = MRI.getVRegDef(LaneDefInst->getOperand(1).getReg())) {
+ }
+
+ // Did we find a def in the first place? If not, bail.
+ if (!LaneDefInst) {
+ LLVM_DEBUG(dbgs() << "Did not find VReg definition for " << LaneIdxOp
+ << "\n");
+ return false;
+ }
+
+ // TODO: Handle extracts that don't use G_CONSTANT.
+ if (LaneDefInst->getOpcode() != TargetOpcode::G_CONSTANT) {
+ LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT "
+ "currently unsupported.\n");
+ return false;
+ }
+
+ unsigned LaneIdx = LaneDefInst->getOperand(1).getCImm()->getLimitedValue();
+ unsigned CopyOpc = 0;
+ unsigned ExtractSubReg = 0;
+ if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) {
+ LLVM_DEBUG(
+ dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
+ return false;
+ }
+
+ const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
+ const TargetRegisterClass *DstRC =
+ getRegClassForTypeOnBank(NarrowTy, DstRB, RBI, true);
+ if (!DstRC) {
+ LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
+ return false;
+ }
+
+ const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
+ const TargetRegisterClass *SrcRC =
+ getRegClassForTypeOnBank(WideTy, SrcRB, RBI, true);
+ if (!SrcRC) {
+ LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
+ return false;
+ }
+
+ // The register that we're going to copy into.
+ unsigned InsertReg = SrcReg;
+ MachineIRBuilder MIRBuilder(I);
+
+ // If the lane index is 0, we just use a subregister COPY.
+ if (LaneIdx == 0) {
+ unsigned CopyTo = I.getOperand(0).getReg();
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
+ CopyTo)
+ .addUse(SrcReg, 0, ExtractSubReg);
+ RBI.constrainGenericRegister(CopyTo, *DstRC, MRI);
+ I.eraseFromParent();
+ return true;
+ }
+
+ // Lane copies require 128-bit wide registers. If we're dealing with an
+ // unpacked vector, then we need to move up to that width. Insert an implicit
+ // def and a subregister insert to get us there.
+ if (WideTy.getSizeInBits() != 128) {
+ MachineInstr *ScalarToVector = emitScalarToVector(
+ WideTy.getSizeInBits(), &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
+ if (!ScalarToVector)
+ return false;
+ InsertReg = ScalarToVector->getOperand(0).getReg();
+ }
+
+ MachineInstr *LaneCopyMI =
+ MIRBuilder.buildInstr(CopyOpc, {DstReg}, {InsertReg}).addImm(LaneIdx);
+ constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
+
+ // Make sure that we actually constrain the initial copy.
+ RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
+
+ I.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
@@ -1823,24 +1958,8 @@ bool AArch64InstructionSelector::selectU
// vector's elements.
unsigned CopyOpc = 0;
unsigned ExtractSubReg = 0;
- switch (NarrowTy.getSizeInBits()) {
- case 16:
- CopyOpc = AArch64::CPYi16;
- ExtractSubReg = AArch64::hsub;
- break;
- case 32:
- CopyOpc = AArch64::CPYi32;
- ExtractSubReg = AArch64::ssub;
- break;
- case 64:
- CopyOpc = AArch64::CPYi64;
- ExtractSubReg = AArch64::dsub;
- break;
- default:
- // Unknown size, bail out.
- LLVM_DEBUG(dbgs() << "NarrowTy had unsupported size.\n");
+ if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
return false;
- }
// Set up for the lane copies.
MachineBasicBlock &MBB = *I.getParent();
Modified: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp?rev=355871&r1=355870&r2=355871&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp Mon Mar 11 15:18:01 2019
@@ -446,7 +446,8 @@ AArch64LegalizerInfo::AArch64LegalizerIn
.minScalar(2, s64)
.legalIf([=](const LegalityQuery &Query) {
const LLT &VecTy = Query.Types[1];
- return VecTy == v4s32 || VecTy == v2s64;
+ return VecTy == v2s16 || VecTy == v4s16 || VecTy == v4s32 ||
+ VecTy == v2s64 || VecTy == v2s32;
});
getActionDefinitionsBuilder(G_BUILD_VECTOR)
Modified: llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp?rev=355871&r1=355870&r2=355871&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp Mon Mar 11 15:18:01 2019
@@ -689,6 +689,14 @@ AArch64RegisterBankInfo::getInstrMapping
}
break;
}
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ // Destination and source need to be FPRs.
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ OpRegBankIdx[1] = PMI_FirstFPR;
+
+ // Index needs to be a GPR.
+ OpRegBankIdx[2] = PMI_FirstGPR;
+ break;
case TargetOpcode::G_BUILD_VECTOR:
// If the first source operand belongs to a FPR register bank, then make
Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir?rev=355871&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir Mon Mar 11 15:18:01 2019
@@ -0,0 +1,103 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=regbankselect %s -o - | FileCheck %s
+
+name: v2s32_fpr
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1.entry:
+ liveins: $d0
+
+ %0:_(<2 x s32>) = COPY $d0
+ %2:_(s64) = G_CONSTANT i64 1
+ %1:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %2(s64)
+ $s0 = COPY %1(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: v4s32_gpr
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1.entry:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: v4s32_gpr
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0
+ ; CHECK: [[EVEC:%[0-9]+]]:fpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK: $s0 = COPY [[EVEC]](s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:_(<4 x s32>) = COPY $q0
+ %2:_(s64) = G_CONSTANT i64 0
+ %1:_(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %2(s64)
+ $s0 = COPY %1(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: v2s64_fpr
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1.entry:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: v2s64_fpr
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 2
+ ; CHECK: [[EVEC:%[0-9]+]]:fpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
+ ; CHECK: $d0 = COPY [[EVEC]](s64)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<2 x s64>) = COPY $q0
+ %2:_(s64) = G_CONSTANT i64 2
+ %1:_(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %2(s64)
+ $d0 = COPY %1(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: v4s16_fpr
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1.entry:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: v4s16_fpr
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s16>) = COPY $d0
+ ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1
+ ; CHECK: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[C]](s64)
+ ; CHECK: $h0 = COPY [[EVEC]](s16)
+ ; CHECK: RET_ReallyLR implicit $h0
+ %0:_(<4 x s16>) = COPY $d0
+ %2:_(s64) = G_CONSTANT i64 1
+ %1:_(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %2(s64)
+ $h0 = COPY %1(s16)
+ RET_ReallyLR implicit $h0
+
+...
Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir?rev=355871&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir Mon Mar 11 15:18:01 2019
@@ -0,0 +1,117 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name: v2s32_fpr
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: gpr }
+ - { id: 3, class: fpr }
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: v2s32_fpr
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1
+ ; CHECK: $s0 = COPY [[CPYi32_]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:fpr(<2 x s32>) = COPY $d0
+ %2:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s64) = COPY %2(s64)
+ %1:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64)
+ $s0 = COPY %1(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: v2s32_fpr_idx0
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+ ; CHECK-LABEL: name: v2s32_fpr_idx0
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
+ ; CHECK: $s0 = COPY [[COPY1]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:fpr(<2 x s32>) = COPY $d0
+ %2:gpr(s64) = G_CONSTANT i64 0
+ %3:fpr(s64) = COPY %2(s64)
+ %1:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64)
+ $s0 = COPY %1(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: v2s64_fpr
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: gpr }
+ - { id: 3, class: fpr }
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: v2s64_fpr
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 2
+ ; CHECK: $d0 = COPY [[CPYi64_]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(<2 x s64>) = COPY $q0
+ %2:gpr(s64) = G_CONSTANT i64 2
+ %3:fpr(s64) = COPY %2(s64)
+ %1:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %3(s64)
+ $d0 = COPY %1(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: v4s16_fpr
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: gpr }
+ - { id: 3, class: fpr }
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: v4s16_fpr
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1
+ ; CHECK: $h0 = COPY [[CPYi16_]]
+ ; CHECK: RET_ReallyLR implicit $h0
+ %0:fpr(<4 x s16>) = COPY $d0
+ %2:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s64) = COPY %2(s64)
+ %1:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %3(s64)
+ $h0 = COPY %1(s16)
+ RET_ReallyLR implicit $h0
+
+...
More information about the llvm-commits
mailing list