[llvm] 5c26be2 - [AArch64][GlobalISel] Lower G_BUILD_VECTOR -> G_DUP
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 8 13:01:43 PST 2021
Author: Jessica Paquette
Date: 2021-03-08T13:01:10-08:00
New Revision: 5c26be214d9fd9bd544aab42f51898b447524edc
URL: https://github.com/llvm/llvm-project/commit/5c26be214d9fd9bd544aab42f51898b447524edc
DIFF: https://github.com/llvm/llvm-project/commit/5c26be214d9fd9bd544aab42f51898b447524edc.diff
LOG: [AArch64][GlobalISel] Lower G_BUILD_VECTOR -> G_DUP
If we have
```
%vec = G_BUILD_VECTOR %reg, %reg, ..., %reg
```
Then lower it to
```
%vec = G_DUP %reg
```
Also update the selector to handle constant splats on G_DUP.
This will not combine when the splat is all zeros or ones. Tablegen-imported
patterns rely on these being G_BUILD_VECTOR.
Minor code size improvements on CTMark at -Os.
Also adds some utility functions to make it a bit easier to recognize splats,
and an AArch64-specific splat helper.
Differential Revision: https://reviews.llvm.org/D97731
Added:
llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/Utils.h
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/lib/Target/AArch64/CMakeLists.txt
llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 5a299b87589a..17826cc8d6cb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -260,6 +260,31 @@ LLT getLCMType(LLT OrigTy, LLT TargetTy);
LLVM_READNONE
LLT getGCDType(LLT OrigTy, LLT TargetTy);
+/// Represents a value which can be a Register or a constant.
+///
+/// This is useful in situations where an instruction may have an interesting
+/// register operand or interesting constant operand. For a concrete example,
+/// \see getVectorSplat.
+class RegOrConstant {
+ int64_t Cst;
+ Register Reg;
+ bool IsReg;
+
+public:
+ explicit RegOrConstant(Register Reg) : Reg(Reg), IsReg(true) {}
+ explicit RegOrConstant(int64_t Cst) : Cst(Cst), IsReg(false) {}
+ bool isReg() const { return IsReg; }
+ bool isCst() const { return !IsReg; }
+ Register getReg() const {
+ assert(isReg() && "Expected a register!");
+ return Reg;
+ }
+ int64_t getCst() const {
+ assert(isCst() && "Expected a constant!");
+ return Cst;
+ }
+};
+
/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
/// If \p MI is not a splat, returns None.
Optional<int> getSplatIndex(MachineInstr &MI);
@@ -278,6 +303,28 @@ bool isBuildVectorAllZeros(const MachineInstr &MI,
bool isBuildVectorAllOnes(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
+/// \returns a value when \p MI is a vector splat. The splat can be either a
+/// Register or a constant.
+///
+/// Examples:
+///
+/// \code
+/// %reg = COPY $physreg
+/// %reg_splat = G_BUILD_VECTOR %reg, %reg, ..., %reg
+/// \endcode
+///
+/// If called on the G_BUILD_VECTOR above, this will return a RegOrConstant
+/// containing %reg.
+///
+/// \code
+/// %cst = G_CONSTANT iN 4
+/// %constant_splat = G_BUILD_VECTOR %cst, %cst, ..., %cst
+/// \endcode
+///
+/// In the above case, this will return a RegOrConstant containing 4.
+Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
/// Returns true if given the TargetLowering's boolean contents information,
/// the value \p Val contains a true value.
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index a44a1a7c945c..c24ebcf38c5f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -828,6 +828,20 @@ bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
return isBuildVectorConstantSplat(MI, MRI, -1);
}
+Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ unsigned Opc = MI.getOpcode();
+ if (!isBuildVectorOp(Opc))
+ return None;
+ if (auto Splat = getBuildVectorConstantSplat(MI, MRI))
+ return RegOrConstant(*Splat);
+ auto Reg = MI.getOperand(1).getReg();
+ if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
+ [&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; }))
+ return None;
+ return RegOrConstant(Reg);
+}
+
bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
bool IsFP) {
switch (TLI.getBooleanContents(IsVector, IsFP)) {
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index d7bcac709949..144e6b747f51 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -135,13 +135,22 @@ def mul_const : GICombineRule<
(apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
>;
+def build_vector_to_dup : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_BUILD_VECTOR):$root,
+ [{ return matchBuildVectorToDup(*${root}, MRI); }]),
+ (apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }])
+>;
+
+def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
def AArch64PostLegalizerLoweringHelper
: GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
[shuffle_vector_lowering, vashr_vlshr_imm,
- icmp_lowering]> {
+ icmp_lowering, build_vector_lowering]> {
let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index d9fd81c22ca9..4bf8da0380a8 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -29,6 +29,7 @@ add_public_tablegen_target(AArch64CommonTableGen)
add_llvm_target(AArch64CodeGen
GISel/AArch64CallLowering.cpp
+ GISel/AArch64GlobalISelUtils.cpp
GISel/AArch64InstructionSelector.cpp
GISel/AArch64LegalizerInfo.cpp
GISel/AArch64PreLegalizerCombiner.cpp
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
new file mode 100644
index 000000000000..7c543028af9f
--- /dev/null
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -0,0 +1,38 @@
+//===- AArch64GlobalISelUtils.cpp --------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file Implementations of AArch64-specific helper functions used in the
+/// GlobalISel pipeline.
+//===----------------------------------------------------------------------===//
+#include "AArch64GlobalISelUtils.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+Optional<RegOrConstant>
+AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (auto Splat = getVectorSplat(MI, MRI))
+ return Splat;
+ if (MI.getOpcode() != AArch64::G_DUP)
+ return None;
+ Register Src = MI.getOperand(1).getReg();
+ if (auto ValAndVReg =
+ getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI))
+ return RegOrConstant(ValAndVReg->Value.getSExtValue());
+ return RegOrConstant(Src);
+}
+
+Optional<int64_t> AArch64GISelUtils::getAArch64VectorSplatScalar(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI) {
+ auto Splat = getAArch64VectorSplat(MI, MRI);
+ if (!Splat || Splat->isReg())
+ return None;
+ return Splat->getCst();
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
index bed1136c7a67..b1e575d4e4d6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -12,6 +12,9 @@
#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/Register.h"
#include <cstdint>
namespace llvm {
@@ -23,6 +26,16 @@ constexpr bool isLegalArithImmed(const uint64_t C) {
return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
}
+/// \returns A value when \p MI is a vector splat of a Register or constant.
+/// Checks for generic opcodes and AArch64-specific generic opcodes.
+Optional<RegOrConstant> getAArch64VectorSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// \returns A value when \p MI is a constant vector splat.
+/// Checks for generic opcodes and AArch64-specific generic opcodes.
+Optional<int64_t> getAArch64VectorSplatScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
} // namespace AArch64GISelUtils
} // namespace llvm
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3134649d5678..7329c6370af0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -11,6 +11,7 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
+#include "AArch64GlobalISelUtils.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
@@ -24,7 +25,6 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -46,6 +47,7 @@
using namespace llvm;
using namespace MIPatternMatch;
+using namespace AArch64GISelUtils;
namespace llvm {
class BlockFrequencyInfo;
@@ -145,6 +147,16 @@ class AArch64InstructionSelector : public InstructionSelector {
Register EltReg, unsigned LaneIdx,
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit a sequence of instructions representing a constant \p CV for a
+ /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
+ ///
+ /// \returns the last instruction in the sequence on success, and nullptr
+ /// otherwise.
+ MachineInstr *emitConstantVector(Register Dst, Constant *CV,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) const;
+
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI) const;
@@ -1659,23 +1671,7 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
MachineInstr *OpMI = MRI.getVRegDef(Reg);
assert(OpMI && "Expected to find a vreg def for vector shift operand");
- if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
- return None;
-
- // Check all operands are identical immediates.
- int64_t ImmVal = 0;
- for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
- auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
- if (!VRegAndVal)
- return None;
-
- if (Idx == 1)
- ImmVal = VRegAndVal->Value.getSExtValue();
- if (ImmVal != VRegAndVal->Value.getSExtValue())
- return None;
- }
-
- return ImmVal;
+ return getAArch64VectorSplatScalar(*OpMI, MRI);
}
/// Matches and returns the shift immediate value for a SHL instruction given
@@ -1963,7 +1959,7 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
MRI.setType(I.getOperand(0).getReg(),
DstTy.changeElementType(LLT::scalar(64)));
- MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
I.getOperand(1).setReg(NewSrc.getReg(0));
return true;
}
@@ -2125,6 +2121,25 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
+ case AArch64::G_DUP: {
+ // Before selecting a DUP instruction, check if it is better selected as a
+ // MOV or load from a constant pool.
+ Register Src = I.getOperand(1).getReg();
+ auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
+ if (!ValAndVReg)
+ return false;
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ Register Dst = I.getOperand(0).getReg();
+ auto *CV = ConstantDataVector::getSplat(
+ MRI.getType(Dst).getNumElements(),
+ ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
+ ValAndVReg->Value));
+ MachineIRBuilder MIRBuilder(I);
+ if (!emitConstantVector(Dst, CV, MIRBuilder, MRI))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
case TargetOpcode::G_BR: {
// If the branch jumps to the fallthrough block, don't bother emitting it.
// Only do this for -O0 for a good code size improvement, because when
@@ -4811,6 +4826,44 @@ bool AArch64InstructionSelector::selectInsertElt(
return true;
}
+MachineInstr *
+AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) const {
+ LLT DstTy = MRI.getType(Dst);
+ unsigned DstSize = DstTy.getSizeInBits();
+ if (CV->isNullValue()) {
+ if (DstSize == 128) {
+ auto Mov =
+ MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+ }
+
+ if (DstSize == 64) {
+ auto Mov =
+ MIRBuilder
+ .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
+ .addImm(0);
+ auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
+ .addReg(Mov.getReg(0), 0, AArch64::dsub);
+ RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
+ return &*Copy;
+ }
+ }
+
+ auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
+ if (!CPLoad) {
+ LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
+ return nullptr;
+ }
+
+ auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
+ RBI.constrainGenericRegister(
+ Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
+ return &*Copy;
+}
+
bool AArch64InstructionSelector::tryOptConstantBuildVec(
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
@@ -4837,33 +4890,8 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
}
Constant *CV = ConstantVector::get(Csts);
MachineIRBuilder MIB(I);
- if (CV->isNullValue()) {
- // Until the importer can support immAllZerosV in pattern leaf nodes,
- // select a zero move manually here.
- Register DstReg = I.getOperand(0).getReg();
- if (DstSize == 128) {
- auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- } else if (DstSize == 64) {
- auto Mov =
- MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
- .addImm(0);
- MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(Mov.getReg(0), 0, AArch64::dsub);
- I.eraseFromParent();
- return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
- }
- }
- auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
- if (!CPLoad) {
- LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
+ if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
return false;
- }
- MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
- RBI.constrainGenericRegister(I.getOperand(0).getReg(),
- *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
- MRI);
I.eraseFromParent();
return true;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 3e059832f0a6..bde7df1bea7a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -471,7 +471,7 @@ static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
int64_t &Cnt) {
assert(Ty.isVector() && "vector shift count is not a vector type");
MachineInstr *MI = MRI.getVRegDef(Reg);
- auto Cst = getBuildVectorConstantSplat(*MI, MRI);
+ auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
if (!Cst)
return false;
Cnt = *Cst;
@@ -696,6 +696,29 @@ bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
+static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ auto Splat = getAArch64VectorSplat(MI, MRI);
+ if (!Splat)
+ return false;
+ if (Splat->isReg())
+ return true;
+ // Later, during selection, we'll try to match imported patterns using
+ // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
+ // G_BUILD_VECTORs which could match those patterns.
+ int64_t Cst = Splat->getCst();
+ return (Cst != 0 && Cst != -1);
+}
+
+static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(MI);
+ B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
+ {MI.getOperand(1).getReg()});
+ MI.eraseFromParent();
+ return true;
+}
+
#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGILowering.inc"
#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
new file mode 100644
index 000000000000..db17c66bde96
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
@@ -0,0 +1,181 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LOWER
+# RUN: llc -mtriple aarch64 -O2 -start-before=aarch64-postlegalizer-lowering -stop-after=instruction-select -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=SELECT
+...
+---
+name: same_reg
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+ ; LOWER-LABEL: name: same_reg
+ ; LOWER: liveins: $d0
+ ; LOWER: %r:_(s8) = G_IMPLICIT_DEF
+ ; LOWER: %build_vector:_(<8 x s8>) = G_DUP %r(s8)
+ ; LOWER: $d0 = COPY %build_vector(<8 x s8>)
+ ; LOWER: RET_ReallyLR implicit $d0
+ ; SELECT-LABEL: name: same_reg
+ ; SELECT: liveins: $d0
+ ; SELECT: %r:gpr32 = IMPLICIT_DEF
+ ; SELECT: %build_vector:fpr64 = DUPv8i8gpr %r
+ ; SELECT: $d0 = COPY %build_vector
+ ; SELECT: RET_ReallyLR implicit $d0
+ %r:_(s8) = G_IMPLICIT_DEF
+ %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
+ $d0 = COPY %build_vector(<8 x s8>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: dont_combine_
diff erent_reg
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $w0, $w1
+ ; LOWER-LABEL: name: dont_combine_
diff erent_reg
+ ; LOWER: liveins: $d0, $w0, $w1
+ ; LOWER: %r:_(s32) = COPY $w0
+ ; LOWER: %q:_(s32) = COPY $w1
+ ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32)
+ ; LOWER: $d0 = COPY %build_vector(<2 x s32>)
+ ; LOWER: RET_ReallyLR implicit $d0
+ ; SELECT-LABEL: name: dont_combine_
diff erent_reg
+ ; SELECT: liveins: $d0, $w0, $w1
+ ; SELECT: %r:gpr32all = COPY $w0
+ ; SELECT: %q:gpr32 = COPY $w1
+ ; SELECT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; SELECT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
+ ; SELECT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %q
+ ; SELECT: %build_vector:fpr64 = COPY [[INSvi32gpr]].dsub
+ ; SELECT: $d0 = COPY %build_vector
+ ; SELECT: RET_ReallyLR implicit $d0
+ %r:_(s32) = COPY $w0
+ %q:_(s32) = COPY $w1
+ %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r, %q
+ $d0 = COPY %build_vector(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: dont_combine_zero
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+ ; Don't combine with 0. We want to avoid blocking immAllZerosV selection
+ ; patterns.
+
+ ; LOWER-LABEL: name: dont_combine_zero
+ ; LOWER: liveins: $d0
+ ; LOWER: %r:_(s8) = G_CONSTANT i8 0
+ ; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
+ ; LOWER: $d0 = COPY %build_vector(<8 x s8>)
+ ; LOWER: RET_ReallyLR implicit $d0
+ ; SELECT-LABEL: name: dont_combine_zero
+ ; SELECT: liveins: $d0
+ ; SELECT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
+ ; SELECT: %build_vector:fpr64 = COPY [[MOVIv2d_ns]].dsub
+ ; SELECT: $d0 = COPY %build_vector
+ ; SELECT: RET_ReallyLR implicit $d0
+ %r:_(s8) = G_CONSTANT i8 0
+ %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
+ $d0 = COPY %build_vector(<8 x s8>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: dont_combine_all_ones
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+ ; Don't combine with -1. We want to avoid blocking immAllOnesV selection
+ ; patterns.
+
+ ; LOWER-LABEL: name: dont_combine_all_ones
+ ; LOWER: liveins: $d0
+ ; LOWER: %r:_(s8) = G_CONSTANT i8 -1
+ ; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
+ ; LOWER: $d0 = COPY %build_vector(<8 x s8>)
+ ; LOWER: RET_ReallyLR implicit $d0
+ ; SELECT-LABEL: name: dont_combine_all_ones
+ ; SELECT: liveins: $d0
+ ; SELECT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
+ ; SELECT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
+ ; SELECT: $d0 = COPY [[LDRDui]]
+ ; SELECT: RET_ReallyLR implicit $d0
+ %r:_(s8) = G_CONSTANT i8 -1
+ %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
+ $d0 = COPY %build_vector(<8 x s8>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: all_zeros_pat_example
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+ ; We should get a NEGv2i32 here.
+
+ ; LOWER-LABEL: name: all_zeros_pat_example
+ ; LOWER: liveins: $d0
+ ; LOWER: %v:_(<2 x s32>) = COPY $d0
+ ; LOWER: %cst:_(s32) = G_CONSTANT i32 0
+ ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
+ ; LOWER: %sub:_(<2 x s32>) = G_SUB %build_vector, %v
+ ; LOWER: $d0 = COPY %sub(<2 x s32>)
+ ; LOWER: RET_ReallyLR implicit $d0
+ ; SELECT-LABEL: name: all_zeros_pat_example
+ ; SELECT: liveins: $d0
+ ; SELECT: %v:fpr64 = COPY $d0
+ ; SELECT: %sub:fpr64 = NEGv2i32 %v
+ ; SELECT: $d0 = COPY %sub
+ ; SELECT: RET_ReallyLR implicit $d0
+ %v:_(<2 x s32>) = COPY $d0
+ %cst:_(s32) = G_CONSTANT i32 0
+ %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst
+ %sub:_(<2 x s32>) = G_SUB %build_vector, %v
+ $d0 = COPY %sub(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: all_ones_pat_example
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1
+ ; We should get a BICv8i8 here.
+
+ ; LOWER-LABEL: name: all_ones_pat_example
+ ; LOWER: liveins: $d0, $d1
+ ; LOWER: %v0:_(<2 x s32>) = COPY $d0
+ ; LOWER: %v1:_(<2 x s32>) = COPY $d1
+ ; LOWER: %cst:_(s32) = G_CONSTANT i32 -1
+ ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
+ ; LOWER: %xor:_(<2 x s32>) = G_XOR %v0, %build_vector
+ ; LOWER: %and:_(<2 x s32>) = G_AND %v1, %xor
+ ; LOWER: $d0 = COPY %and(<2 x s32>)
+ ; LOWER: RET_ReallyLR implicit $d0
+ ; SELECT-LABEL: name: all_ones_pat_example
+ ; SELECT: liveins: $d0, $d1
+ ; SELECT: %v0:fpr64 = COPY $d0
+ ; SELECT: %v1:fpr64 = COPY $d1
+ ; SELECT: %and:fpr64 = BICv8i8 %v1, %v0
+ ; SELECT: $d0 = COPY %and
+ ; SELECT: RET_ReallyLR implicit $d0
+ %v0:_(<2 x s32>) = COPY $d0
+ %v1:_(<2 x s32>) = COPY $d1
+ %cst:_(s32) = G_CONSTANT i32 -1
+ %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst
+ %xor:_(<2 x s32>) = G_XOR %v0, %build_vector
+ %and:_(<2 x s32>) = G_AND %v1, %xor
+ $d0 = COPY %and(<2 x s32>)
+ RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
index 32170b48c83f..a38086ef235c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
@@ -84,8 +84,8 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[C]](s32)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[DUP]](<4 x s32>)
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index cc2a2fdf4c69..9dc0ba68fe04 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -379,3 +379,61 @@ body: |
RET_ReallyLR implicit $q0
...
+---
+name: cst_v4s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $w0
+ ; CHECK-LABEL: name: cst_v4s32
+ ; CHECK: liveins: $w0
+ ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
+ ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
+ ; CHECK: $q0 = COPY [[LDRQui]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %cst:gpr(s32) = G_CONSTANT i32 3
+ %dup:fpr(<4 x s32>) = G_DUP %cst(s32)
+ $q0 = COPY %dup(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: cst_v8s8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $w0
+ ; CHECK-LABEL: name: cst_v8s8
+ ; CHECK: liveins: $w0
+ ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
+ ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
+ ; CHECK: $d0 = COPY [[LDRDui]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %cst:gpr(s8) = G_CONSTANT i8 3
+ %dup:fpr(<8 x s8>) = G_DUP %cst(s8)
+ $d0 = COPY %dup(<8 x s8>)
+ RET_ReallyLR implicit $d0
+...
+---
+name: cst_v2p0
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $w0
+ ; CHECK-LABEL: name: cst_v2p0
+ ; CHECK: liveins: $w0
+ ; CHECK: %cst:gpr64 = MOVi64imm 3
+ ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
+ ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
+ ; CHECK: $q0 = COPY [[LDRQui]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %cst:gpr(p0) = G_CONSTANT i64 3
+ %dup:fpr(<2 x p0>) = G_DUP %cst(p0)
+ $q0 = COPY %dup(<2 x p0>)
+ RET_ReallyLR implicit $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
index 6a5c33ed9c14..b00b6ecf037e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
@@ -572,3 +572,38 @@ body: |
$q0 = COPY %2(<16 x s8>)
RET_ReallyLR implicit $q0
...
+---
+name: shl_v2i32_imm_dup
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: gpr }
+ - { id: 3, class: fpr }
+liveins:
+ - { reg: '$d0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $d0
+
+ ; Should still be able to select immediate forms using a G_DUP from a
+ ; constant.
+
+ ; CHECK-LABEL: name: shl_v2i32_imm_dup
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 24
+ ; CHECK: $d0 = COPY [[SHLv2i32_shift]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(<2 x s32>) = COPY $d0
+ %2:gpr(s32) = G_CONSTANT i32 24
+ %1:fpr(<2 x s32>) = G_DUP %2(s32)
+ %3:fpr(<2 x s32>) = G_SHL %0, %1(<2 x s32>)
+ $d0 = COPY %3(<2 x s32>)
+ RET_ReallyLR implicit $d0
More information about the llvm-commits
mailing list