[llvm] r356526 - [AArch64][GlobalISel] Add an optimization to select vector DUP instructions.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 14:43:06 PDT 2019
Author: aemerson
Date: Tue Mar 19 14:43:05 2019
New Revision: 356526
URL: http://llvm.org/viewvc/llvm-project?rev=356526&view=rev
Log:
[AArch64][GlobalISel] Add an optimization to select vector DUP instructions.
This adds pattern matching for the insert+shufflevector sequence so we can
generate dup instructions instead of the current TBL sequence.
Differential Revision: https://reviews.llvm.org/D59558
Added:
llvm/trunk/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp?rev=356526&r1=356525&r2=356526&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp Tue Mar 19 14:43:05 2019
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -143,6 +144,21 @@ private:
void materializeLargeCMVal(MachineInstr &I, const Value *V,
unsigned char OpFlags) const;
+ // Optimization methods.
+
+ // Helper function to check if a reg def is an MI with a given opcode and
+ // returns it if so.
+ MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
+ MachineIRBuilder &MIB) const {
+ auto *Def = MIB.getMRI()->getVRegDef(Reg);
+ if (!Def || Def->getOpcode() != Opc)
+ return nullptr;
+ return Def;
+ }
+
+ bool tryOptVectorShuffle(MachineInstr &I) const;
+ bool tryOptVectorDup(MachineInstr &MI) const;
+
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
@@ -2325,8 +2341,97 @@ MachineInstr *AArch64InstructionSelector
return &*InsElt;
}
+bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
+ // Try to match a vector splat operation into a dup instruction.
+ // We're looking for this pattern:
+ // %scalar:gpr(s64) = COPY $x0
+ // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
+ // %cst0:gpr(s32) = G_CONSTANT i32 0
+ // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
+ // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
+ // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
+ // %zerovec(<2 x s32>)
+ //
+ // ...into:
+ // %splat = DUP %scalar
+ // We use the regbank of the scalar to determine which kind of dup to use.
+ MachineIRBuilder MIB(I);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ using namespace TargetOpcode;
+ using namespace MIPatternMatch;
+
+ // Begin matching the insert.
+ auto *InsMI =
+ findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
+ if (!InsMI)
+ return false;
+ // Match the undef vector operand.
+ auto *UndefMI =
+ findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
+ if (!UndefMI)
+ return false;
+ // Match the scalar being splatted.
+ unsigned ScalarReg = InsMI->getOperand(2).getReg();
+ const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
+ // Match the index constant 0.
+ int64_t Index = 0;
+ if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
+ return false;
+
+ // The shuffle's second operand doesn't matter if the mask is all zero.
+ auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
+ if (!ZeroVec)
+ return false;
+ int64_t Zero = 0;
+ if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
+ return false;
+ for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
+ if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
+ return false; // This wasn't an all zeros vector.
+ }
+
+ // We're done, now find out what kind of splat we need.
+ LLT VecTy = MRI.getType(I.getOperand(0).getReg());
+ LLT EltTy = VecTy.getElementType();
+ if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
+ LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
+ return false;
+ }
+ bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
+ {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
+ unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
+
+ // For FP splats, we need to widen the scalar reg via undef too.
+ if (IsFP) {
+ MachineInstr *Widen = emitScalarToVector(
+ EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
+ if (!Widen)
+ return false;
+ ScalarReg = Widen->getOperand(0).getReg();
+ }
+ auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
+ if (IsFP)
+ Dup.addImm(0);
+ constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return false;
+ if (tryOptVectorDup(I))
+ return true;
+ return false;
+}
+
bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) const {
+ if (tryOptVectorShuffle(I))
+ return true;
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
unsigned Src1Reg = I.getOperand(1).getReg();
const LLT Src1Ty = MRI.getType(Src1Reg);
Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir?rev=356526&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir Tue Mar 19 14:43:05 2019
@@ -0,0 +1,110 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -O1 -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: splat_4xi32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: splat_4xi32
+ ; CHECK: liveins: $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: [[DUPv4i32gpr:%[0-9]+]]:fpr128 = DUPv4i32gpr [[COPY]]
+ ; CHECK: $q0 = COPY [[DUPv4i32gpr]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:gpr(s32) = COPY $w0
+ %2:fpr(<4 x s32>) = G_IMPLICIT_DEF
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %5:fpr(<4 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32), %3(s32), %3(s32)
+ %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+ %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, %5(<4 x s32>)
+ $q0 = COPY %4(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: splat_2xi64
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: splat_2xi64
+ ; CHECK: liveins: $x0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[DUPv2i64gpr:%[0-9]+]]:fpr128 = DUPv2i64gpr [[COPY]]
+ ; CHECK: $q0 = COPY [[DUPv2i64gpr]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:gpr(s64) = COPY $x0
+ %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %5:fpr(<2 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32)
+ %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32)
+ %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, %5(<2 x s32>)
+ $q0 = COPY %4(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: splat_4xf32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $s0
+
+ ; CHECK-LABEL: name: splat_4xf32
+ ; CHECK: liveins: $s0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub
+ ; CHECK: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[INSERT_SUBREG]], 0
+ ; CHECK: $q0 = COPY [[DUPv4i32lane]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:fpr(s32) = COPY $s0
+ %2:fpr(<4 x s32>) = G_IMPLICIT_DEF
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %5:fpr(<4 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32), %3(s32), %3(s32)
+ %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+ %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, %5(<4 x s32>)
+ $q0 = COPY %4(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: splat_2xf64
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: splat_2xf64
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
+ ; CHECK: $q0 = COPY [[DUPv2i64lane]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:fpr(s64) = COPY $d0
+ %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %5:fpr(<2 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32)
+ %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32)
+ %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, %5(<2 x s32>)
+ $q0 = COPY %4(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
More information about the llvm-commits
mailing list