[llvm] [GlobalISel][AArch64] Combine Vector Reduction Add Long (PR #76241)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 22 06:09:11 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-backend-aarch64
Author: None (chuongg3)
<details>
<summary>Changes</summary>
The first commit `[AArch64][GlobalISel] Pre-Commit for Combine ADDLV(ADDLP)` is going through review as well (Pull Request https://github.com/llvm/llvm-project/pull/75832).
ADDLV(ADDLP) => ADDLV
Removes unnecessary ADDLP instruction
ADDV(ADDLP) => ADDLV
Already exists for SDAG, adding for GlobalISel
---
Patch is 449.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76241.diff
20 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h (-16)
- (modified) llvm/include/llvm/CodeGen/GlobalISel/Utils.h (+19)
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+32-118)
- (modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+143)
- (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+10-1)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrGISel.td (+30)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+40-8)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+52)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp (+186-41)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir (+68-48)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir (+6-6)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir (+29-4)
- (modified) llvm/test/CodeGen/AArch64/arm64-neon-across.ll (+22-10)
- (modified) llvm/test/CodeGen/AArch64/arm64-vadd.ll (+689-194)
- (modified) llvm/test/CodeGen/AArch64/dp1.ll (+11-22)
- (modified) llvm/test/CodeGen/AArch64/neon-addlv.ll (+42-17)
- (modified) llvm/test/CodeGen/AArch64/popcount.ll (+11-15)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+2964-4208)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir (+16-20)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 711ba10247c34d..c6e8777300efa7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -187,22 +187,6 @@ class LegalizerHelper {
LegalizeResult widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy);
- /// Helper function to split a wide generic register into bitwise blocks with
- /// the given Type (which implies the number of blocks needed). The generic
- /// registers created are appended to Ops, starting at bit 0 of Reg.
- void extractParts(Register Reg, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs);
-
- /// Version which handles irregular splits.
- bool extractParts(Register Reg, LLT RegTy, LLT MainTy,
- LLT &LeftoverTy,
- SmallVectorImpl<Register> &VRegs,
- SmallVectorImpl<Register> &LeftoverVRegs);
-
- /// Version which handles irregular sub-vector splits.
- void extractVectorParts(Register Reg, unsigned NumElst,
- SmallVectorImpl<Register> &VRegs);
-
/// Helper function to build a wide generic register \p DstReg of type \p
/// RegTy from smaller parts. This will produce a G_MERGE_VALUES,
/// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index ffb6e53a0363f9..617953dda9e90e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -33,6 +33,7 @@ class BlockFrequencyInfo;
class GISelKnownBits;
class MachineFunction;
class MachineInstr;
+class MachineIRBuilder;
class MachineOperand;
class MachineOptimizationRemarkEmitter;
class MachineOptimizationRemarkMissed;
@@ -247,6 +248,24 @@ MachineInstr *getDefIgnoringCopies(Register Reg,
/// Also walks through hints such as G_ASSERT_ZEXT.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI);
+/// Helper function to split a wide generic register into bitwise blocks with
+/// the given Type (which implies the number of blocks needed). The generic
+/// registers created are appended to Ops, starting at bit 0 of Reg.
+void extractParts(Register Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs,
+ MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI);
+
+/// Version which handles irregular splits.
+bool extractParts(Register Reg, LLT RegTy, LLT MainTy, LLT &LeftoverTy,
+ SmallVectorImpl<Register> &VRegs,
+ SmallVectorImpl<Register> &LeftoverVRegs,
+ MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI);
+
+/// Version which handles irregular sub-vector splits.
+void extractVectorParts(Register Reg, unsigned NumElst,
+ SmallVectorImpl<Register> &VRegs,
+ MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI);
+
// Templated variant of getOpcodeDef returning a MachineInstr derived T.
/// See if Reg is defined by an single def instruction of type T
/// Also try to do trivial folding if it's a COPY with
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 37e7153be5720e..24374d1387726f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -156,100 +156,6 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
}
}
-void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs) {
- for (int i = 0; i < NumParts; ++i)
- VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
- LLT MainTy, LLT &LeftoverTy,
- SmallVectorImpl<Register> &VRegs,
- SmallVectorImpl<Register> &LeftoverRegs) {
- assert(!LeftoverTy.isValid() && "this is an out argument");
-
- unsigned RegSize = RegTy.getSizeInBits();
- unsigned MainSize = MainTy.getSizeInBits();
- unsigned NumParts = RegSize / MainSize;
- unsigned LeftoverSize = RegSize - NumParts * MainSize;
-
- // Use an unmerge when possible.
- if (LeftoverSize == 0) {
- for (unsigned I = 0; I < NumParts; ++I)
- VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
- MIRBuilder.buildUnmerge(VRegs, Reg);
- return true;
- }
-
- // Perform irregular split. Leftover is last element of RegPieces.
- if (MainTy.isVector()) {
- SmallVector<Register, 8> RegPieces;
- extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
- for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
- VRegs.push_back(RegPieces[i]);
- LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
- LeftoverTy = MRI.getType(LeftoverRegs[0]);
- return true;
- }
-
- LeftoverTy = LLT::scalar(LeftoverSize);
- // For irregular sizes, extract the individual parts.
- for (unsigned I = 0; I != NumParts; ++I) {
- Register NewReg = MRI.createGenericVirtualRegister(MainTy);
- VRegs.push_back(NewReg);
- MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
- }
-
- for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
- Offset += LeftoverSize) {
- Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
- LeftoverRegs.push_back(NewReg);
- MIRBuilder.buildExtract(NewReg, Reg, Offset);
- }
-
- return true;
-}
-
-void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
- SmallVectorImpl<Register> &VRegs) {
- LLT RegTy = MRI.getType(Reg);
- assert(RegTy.isVector() && "Expected a vector type");
-
- LLT EltTy = RegTy.getElementType();
- LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
- unsigned RegNumElts = RegTy.getNumElements();
- unsigned LeftoverNumElts = RegNumElts % NumElts;
- unsigned NumNarrowTyPieces = RegNumElts / NumElts;
-
- // Perfect split without leftover
- if (LeftoverNumElts == 0)
- return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
-
- // Irregular split. Provide direct access to all elements for artifact
- // combiner using unmerge to elements. Then build vectors with NumElts
- // elements. Remaining element(s) will be (used to build vector) Leftover.
- SmallVector<Register, 8> Elts;
- extractParts(Reg, EltTy, RegNumElts, Elts);
-
- unsigned Offset = 0;
- // Requested sub-vectors of NarrowTy.
- for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
- ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
- VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
- }
-
- // Leftover element(s).
- if (LeftoverNumElts == 1) {
- VRegs.push_back(Elts[Offset]);
- } else {
- LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
- ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
- VRegs.push_back(
- MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
- }
-}
-
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
@@ -291,7 +197,8 @@ void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
Register Reg) {
LLT Ty = MRI.getType(Reg);
SmallVector<Register, 8> RegElts;
- extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
+ extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
+ MIRBuilder, MRI);
Elts.append(RegElts);
}
@@ -1330,7 +1237,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
- SrcRegs[i / 2]);
+ SrcRegs[i / 2], MIRBuilder, MRI);
}
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, MI);
@@ -1372,13 +1279,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
- LHSLeftoverRegs))
+ LHSLeftoverRegs, MIRBuilder, MRI))
return UnableToLegalize;
LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
- RHSPartRegs, RHSLeftoverRegs))
+ RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
return UnableToLegalize;
// We now have the LHS and RHS of the compare split into narrow-type
@@ -1532,7 +1439,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changingInstr(MI);
SmallVector<Register, 2> SrcRegs, DstRegs;
unsigned NumParts = SizeOp0 / NarrowSize;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
+ MIRBuilder, MRI);
for (unsigned i = 0; i < NumParts; ++i) {
auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
@@ -3981,7 +3889,8 @@ LegalizerHelper::fewerElementsVectorMultiEltType(
MI.getOperand(UseIdx));
} else {
SmallVector<Register, 8> SplitPieces;
- extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
+ extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
+ MRI);
for (auto Reg : SplitPieces)
InputOpsPieces[UseNo].push_back(Reg);
}
@@ -4037,7 +3946,8 @@ LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
UseIdx += 2, ++UseNo) {
MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
- extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
+ extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
+ MIRBuilder, MRI);
}
// Build PHIs with fewer elements.
@@ -4306,7 +4216,7 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
} else {
if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
- NarrowLeftoverRegs)) {
+ NarrowLeftoverRegs, MIRBuilder, MRI)) {
NumParts = NarrowRegs.size();
NumLeftover = NarrowLeftoverRegs.size();
}
@@ -4547,8 +4457,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
unsigned NewElts = NarrowTy.getNumElements();
SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
- extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
- extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
+ extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
+ extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
SplitSrc2Regs[1]};
@@ -4682,7 +4592,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
: SrcTy.getNumElements();
- extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
+ extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
if (NarrowTy.isScalar()) {
if (DstTy != NarrowTy)
return UnableToLegalize; // FIXME: handle implicit extensions.
@@ -4753,7 +4663,8 @@ LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
SmallVector<Register> SplitSrcs;
// Split the sources into NarrowTy size pieces.
extractParts(SrcReg, NarrowTy,
- SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
+ SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
+ MIRBuilder, MRI);
// We're going to do a tree reduction using vector operations until we have
// one NarrowTy size value left.
while (SplitSrcs.size() > 1) {
@@ -5392,8 +5303,10 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
LLT LeftoverTy, DummyTy;
SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
- extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
- extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
+ extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
+ MIRBuilder, MRI);
+ extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
+ MRI);
int NarrowParts = Src1Regs.size();
for (int I = 0, E = Src1Left.size(); I != E; ++I) {
@@ -5451,8 +5364,8 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
SmallVector<Register, 2> Src1Parts, Src2Parts;
SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
- extractParts(Src1, NarrowTy, NumParts, Src1Parts);
- extractParts(Src2, NarrowTy, NumParts, Src2Parts);
+ extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
+ extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
// Take only high half of registers if this is high mul.
@@ -5504,7 +5417,8 @@ LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
SmallVector<Register, 2> SrcRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
+ MIRBuilder, MRI);
Register OpReg = MI.getOperand(0).getReg();
uint64_t OpStart = MI.getOperand(2).getImm();
@@ -5566,7 +5480,7 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
LLT LeftoverTy;
extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
- LeftoverRegs);
+ LeftoverRegs, MIRBuilder, MRI);
for (Register Reg : LeftoverRegs)
SrcRegs.push_back(Reg);
@@ -5651,12 +5565,12 @@ LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
LLT LeftoverTy;
if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
- Src0Regs, Src0LeftoverRegs))
+ Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
return UnableToLegalize;
LLT Unused;
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
- Src1Regs, Src1LeftoverRegs))
+ Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
llvm_unreachable("inconsistent extractParts result");
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
@@ -5719,12 +5633,12 @@ LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
LLT LeftoverTy;
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
- Src1Regs, Src1LeftoverRegs))
+ Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
return UnableToLegalize;
LLT Unused;
if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
- Src2Regs, Src2LeftoverRegs))
+ Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
llvm_unreachable("inconsistent extractParts result");
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
@@ -6220,7 +6134,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
// First, split the source into two smaller vectors.
SmallVector<Register, 2> SplitSrcs;
- extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
+ extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
// Truncate the splits into intermediate narrower elements.
LLT InterTy;
@@ -6960,7 +6874,7 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
int64_t IdxVal;
if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
SmallVector<Register, 8> SrcRegs;
- extractParts(SrcVec, EltTy, NumElts, SrcRegs);
+ extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
if (InsertVal) {
SrcRegs[IdxVal] = MI.getOperand(2).getReg();
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index eaf829f562b2dc..2523fdd732eee2 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -476,6 +477,148 @@ Register llvm::getSrcRegIgnoringCopies(Register Reg,
return DefSrcReg ? DefSrcReg->Reg : Register();
}
+void llvm::extractParts(Register Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) {
+ for (int i = 0; i < NumParts; ++i)
+ VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+bool llvm::extractParts(Register Reg, LLT RegTy, LLT MainTy, LLT &LeftoverTy,
+ SmallVectorImpl<Register> &VRegs,
+ SmallVectorImpl<Register> &LeftoverRegs,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) {
+ assert(!LeftoverTy.isValid() && "this is an out argument");
+
+ unsigned RegSize = RegTy.getSizeInBits();
+ unsigned MainSize = MainTy.getSizeInBits();
+ unsigned NumParts = RegSize / MainSize;
+ unsigned LeftoverSize = RegSize - NumParts * MainSize;
+
+ // Use an unmerge when possible.
+ if (LeftoverSize == 0) {
+ for (unsigned I = 0; I < NumParts; ++I)
+ VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+ return true;
+ }
+
+ // Try to use unmerge for irregular vector split where possible
+ if (RegTy.isVector() && MainTy.isVector()) {
+ unsigned RegNumElts = RegTy.getNumElements();
+ unsigned MainNumElts = MainTy.getNumElements();
+ unsigned LeftoverNumElts = RegNumElts % MainNumElts;
+ // If can unmerge to LeftoverTy, do it
+ if (MainNumElts % LeftoverNumElts == 0 &&
+ RegNumElts % LeftoverNumElts == 0 &&
+ RegTy.getScalarSizeInBits() == MainTy.getScalarSizeInBits() &&
+ LeftoverNumElts > 1) {
+ LeftoverTy =
+ LLT::fixed_vector(LeftoverNumElts, RegTy.getScalarSizeInBits());
+
+ // Unmerge the SrcReg to LeftoverTy vectors
+ SmallVector<Register, 4> UnmergeValues;
+ extractParts(Reg, LeftoverTy, RegNumElts / LeftoverNumElts, UnmergeValues,
+ MIRBuilder, MRI);
+
+ // Find how many LeftoverTy makes one MainTy
+ unsigned LeftoverPerMain = MainNumElts / LeftoverNumElts;
+ unsigned NumOfLeftoverVal =
+ ((RegNumElts % MainNumElts) / LeftoverNumElts);
+
+ // Create as many MainTy as possible using unmerged value
+ SmallVector<Register, 4> MergeValues;
+ for (unsigned I = 0; I < UnmergeValues.size() - NumOfLeftoverVal; I++) {...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76241
More information about the llvm-commits
mailing list