[llvm] [GlobalISel] Refactor extractParts() (PR #75223)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 07:55:20 PST 2023
https://github.com/chuongg3 updated https://github.com/llvm/llvm-project/pull/75223
>From 84c4eb6c1bd036427237f6d9fde96aa5701f7ea3 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Tue, 12 Dec 2023 17:14:10 +0000
Subject: [PATCH 1/3] [GlobalISel] Pre-commit for Refactor extractParts()
---
.../GlobalISel/legalize-shuffle-vector.mir | 23 +++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index 864275664882c..b1d0ddd098150 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -316,6 +316,29 @@ body: |
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]], shufflemask(0, 1, 5, 6)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s32>), [[UV11:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV3]](<2 x s32>)
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV4]](<2 x s32>), [[UV7]](<2 x s32>)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS1]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV8]](<2 x s32>), [[UV11]](<2 x s32>)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS2]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV12]](<2 x s32>), [[UV15]](<2 x s32>)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS3]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR2]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
>From 387106f4121d9a7306ce5670823adb15e2f62f37 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Tue, 12 Dec 2023 17:15:27 +0000
Subject: [PATCH 2/3] [GlobalISel] Refactor extractParts()
Moved extractParts() and extractVectorParts() from LegalizerHelper
to Utils to be able to use it in different passes.
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 16 --
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 19 +++
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 150 ++++--------------
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 102 ++++++++++++
.../GISel/AArch64PreLegalizerCombiner.cpp | 1 +
5 files changed, 154 insertions(+), 134 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 711ba10247c34..c6e8777300efa 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -187,22 +187,6 @@ class LegalizerHelper {
LegalizeResult widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy);
- /// Helper function to split a wide generic register into bitwise blocks with
- /// the given Type (which implies the number of blocks needed). The generic
- /// registers created are appended to Ops, starting at bit 0 of Reg.
- void extractParts(Register Reg, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs);
-
- /// Version which handles irregular splits.
- bool extractParts(Register Reg, LLT RegTy, LLT MainTy,
- LLT &LeftoverTy,
- SmallVectorImpl<Register> &VRegs,
- SmallVectorImpl<Register> &LeftoverVRegs);
-
- /// Version which handles irregular sub-vector splits.
- void extractVectorParts(Register Reg, unsigned NumElst,
- SmallVectorImpl<Register> &VRegs);
-
/// Helper function to build a wide generic register \p DstReg of type \p
/// RegTy from smaller parts. This will produce a G_MERGE_VALUES,
/// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index ffb6e53a0363f..617953dda9e90 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -33,6 +33,7 @@ class BlockFrequencyInfo;
class GISelKnownBits;
class MachineFunction;
class MachineInstr;
+class MachineIRBuilder;
class MachineOperand;
class MachineOptimizationRemarkEmitter;
class MachineOptimizationRemarkMissed;
@@ -247,6 +248,24 @@ MachineInstr *getDefIgnoringCopies(Register Reg,
/// Also walks through hints such as G_ASSERT_ZEXT.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI);
+/// Helper function to split a wide generic register into bitwise blocks with
+/// the given Type (which implies the number of blocks needed). The generic
+/// registers created are appended to Ops, starting at bit 0 of Reg.
+void extractParts(Register Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs,
+ MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI);
+
+/// Version which handles irregular splits.
+bool extractParts(Register Reg, LLT RegTy, LLT MainTy, LLT &LeftoverTy,
+ SmallVectorImpl<Register> &VRegs,
+ SmallVectorImpl<Register> &LeftoverVRegs,
+ MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI);
+
+/// Version which handles irregular sub-vector splits.
+void extractVectorParts(Register Reg, unsigned NumElst,
+ SmallVectorImpl<Register> &VRegs,
+ MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI);
+
// Templated variant of getOpcodeDef returning a MachineInstr derived T.
/// See if Reg is defined by an single def instruction of type T
/// Also try to do trivial folding if it's a COPY with
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 37e7153be5720..24374d1387726 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -156,100 +156,6 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
}
}
-void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs) {
- for (int i = 0; i < NumParts; ++i)
- VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
- LLT MainTy, LLT &LeftoverTy,
- SmallVectorImpl<Register> &VRegs,
- SmallVectorImpl<Register> &LeftoverRegs) {
- assert(!LeftoverTy.isValid() && "this is an out argument");
-
- unsigned RegSize = RegTy.getSizeInBits();
- unsigned MainSize = MainTy.getSizeInBits();
- unsigned NumParts = RegSize / MainSize;
- unsigned LeftoverSize = RegSize - NumParts * MainSize;
-
- // Use an unmerge when possible.
- if (LeftoverSize == 0) {
- for (unsigned I = 0; I < NumParts; ++I)
- VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
- MIRBuilder.buildUnmerge(VRegs, Reg);
- return true;
- }
-
- // Perform irregular split. Leftover is last element of RegPieces.
- if (MainTy.isVector()) {
- SmallVector<Register, 8> RegPieces;
- extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
- for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
- VRegs.push_back(RegPieces[i]);
- LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
- LeftoverTy = MRI.getType(LeftoverRegs[0]);
- return true;
- }
-
- LeftoverTy = LLT::scalar(LeftoverSize);
- // For irregular sizes, extract the individual parts.
- for (unsigned I = 0; I != NumParts; ++I) {
- Register NewReg = MRI.createGenericVirtualRegister(MainTy);
- VRegs.push_back(NewReg);
- MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
- }
-
- for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
- Offset += LeftoverSize) {
- Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
- LeftoverRegs.push_back(NewReg);
- MIRBuilder.buildExtract(NewReg, Reg, Offset);
- }
-
- return true;
-}
-
-void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
- SmallVectorImpl<Register> &VRegs) {
- LLT RegTy = MRI.getType(Reg);
- assert(RegTy.isVector() && "Expected a vector type");
-
- LLT EltTy = RegTy.getElementType();
- LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
- unsigned RegNumElts = RegTy.getNumElements();
- unsigned LeftoverNumElts = RegNumElts % NumElts;
- unsigned NumNarrowTyPieces = RegNumElts / NumElts;
-
- // Perfect split without leftover
- if (LeftoverNumElts == 0)
- return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
-
- // Irregular split. Provide direct access to all elements for artifact
- // combiner using unmerge to elements. Then build vectors with NumElts
- // elements. Remaining element(s) will be (used to build vector) Leftover.
- SmallVector<Register, 8> Elts;
- extractParts(Reg, EltTy, RegNumElts, Elts);
-
- unsigned Offset = 0;
- // Requested sub-vectors of NarrowTy.
- for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
- ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
- VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
- }
-
- // Leftover element(s).
- if (LeftoverNumElts == 1) {
- VRegs.push_back(Elts[Offset]);
- } else {
- LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
- ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
- VRegs.push_back(
- MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
- }
-}
-
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
@@ -291,7 +197,8 @@ void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
Register Reg) {
LLT Ty = MRI.getType(Reg);
SmallVector<Register, 8> RegElts;
- extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
+ extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
+ MIRBuilder, MRI);
Elts.append(RegElts);
}
@@ -1330,7 +1237,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
- SrcRegs[i / 2]);
+ SrcRegs[i / 2], MIRBuilder, MRI);
}
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, MI);
@@ -1372,13 +1279,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
- LHSLeftoverRegs))
+ LHSLeftoverRegs, MIRBuilder, MRI))
return UnableToLegalize;
LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
- RHSPartRegs, RHSLeftoverRegs))
+ RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
return UnableToLegalize;
// We now have the LHS and RHS of the compare split into narrow-type
@@ -1532,7 +1439,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changingInstr(MI);
SmallVector<Register, 2> SrcRegs, DstRegs;
unsigned NumParts = SizeOp0 / NarrowSize;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
+ MIRBuilder, MRI);
for (unsigned i = 0; i < NumParts; ++i) {
auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
@@ -3981,7 +3889,8 @@ LegalizerHelper::fewerElementsVectorMultiEltType(
MI.getOperand(UseIdx));
} else {
SmallVector<Register, 8> SplitPieces;
- extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
+ extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
+ MRI);
for (auto Reg : SplitPieces)
InputOpsPieces[UseNo].push_back(Reg);
}
@@ -4037,7 +3946,8 @@ LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
UseIdx += 2, ++UseNo) {
MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
- extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
+ extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
+ MIRBuilder, MRI);
}
// Build PHIs with fewer elements.
@@ -4306,7 +4216,7 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
} else {
if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
- NarrowLeftoverRegs)) {
+ NarrowLeftoverRegs, MIRBuilder, MRI)) {
NumParts = NarrowRegs.size();
NumLeftover = NarrowLeftoverRegs.size();
}
@@ -4547,8 +4457,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
unsigned NewElts = NarrowTy.getNumElements();
SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
- extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
- extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
+ extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
+ extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
SplitSrc2Regs[1]};
@@ -4682,7 +4592,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
: SrcTy.getNumElements();
- extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
+ extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
if (NarrowTy.isScalar()) {
if (DstTy != NarrowTy)
return UnableToLegalize; // FIXME: handle implicit extensions.
@@ -4753,7 +4663,8 @@ LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
SmallVector<Register> SplitSrcs;
// Split the sources into NarrowTy size pieces.
extractParts(SrcReg, NarrowTy,
- SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
+ SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
+ MIRBuilder, MRI);
// We're going to do a tree reduction using vector operations until we have
// one NarrowTy size value left.
while (SplitSrcs.size() > 1) {
@@ -5392,8 +5303,10 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
LLT LeftoverTy, DummyTy;
SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
- extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
- extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
+ extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
+ MIRBuilder, MRI);
+ extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
+ MRI);
int NarrowParts = Src1Regs.size();
for (int I = 0, E = Src1Left.size(); I != E; ++I) {
@@ -5451,8 +5364,8 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
SmallVector<Register, 2> Src1Parts, Src2Parts;
SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
- extractParts(Src1, NarrowTy, NumParts, Src1Parts);
- extractParts(Src2, NarrowTy, NumParts, Src2Parts);
+ extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
+ extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
// Take only high half of registers if this is high mul.
@@ -5504,7 +5417,8 @@ LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
SmallVector<Register, 2> SrcRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
+ MIRBuilder, MRI);
Register OpReg = MI.getOperand(0).getReg();
uint64_t OpStart = MI.getOperand(2).getImm();
@@ -5566,7 +5480,7 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
LLT LeftoverTy;
extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
- LeftoverRegs);
+ LeftoverRegs, MIRBuilder, MRI);
for (Register Reg : LeftoverRegs)
SrcRegs.push_back(Reg);
@@ -5651,12 +5565,12 @@ LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
LLT LeftoverTy;
if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
- Src0Regs, Src0LeftoverRegs))
+ Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
return UnableToLegalize;
LLT Unused;
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
- Src1Regs, Src1LeftoverRegs))
+ Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
llvm_unreachable("inconsistent extractParts result");
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
@@ -5719,12 +5633,12 @@ LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
LLT LeftoverTy;
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
- Src1Regs, Src1LeftoverRegs))
+ Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
return UnableToLegalize;
LLT Unused;
if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
- Src2Regs, Src2LeftoverRegs))
+ Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
llvm_unreachable("inconsistent extractParts result");
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
@@ -6220,7 +6134,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
// First, split the source into two smaller vectors.
SmallVector<Register, 2> SplitSrcs;
- extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
+ extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
// Truncate the splits into intermediate narrower elements.
LLT InterTy;
@@ -6960,7 +6874,7 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
int64_t IdxVal;
if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
SmallVector<Register, 8> SrcRegs;
- extractParts(SrcVec, EltTy, NumElts, SrcRegs);
+ extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
if (InsertVal) {
SrcRegs[IdxVal] = MI.getOperand(2).getReg();
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index eaf829f562b2d..83d118a73bbe7 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -476,6 +477,107 @@ Register llvm::getSrcRegIgnoringCopies(Register Reg,
return DefSrcReg ? DefSrcReg->Reg : Register();
}
+void llvm::extractParts(Register Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) {
+ for (int i = 0; i < NumParts; ++i)
+ VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+bool llvm::extractParts(Register Reg, LLT RegTy, LLT MainTy, LLT &LeftoverTy,
+ SmallVectorImpl<Register> &VRegs,
+ SmallVectorImpl<Register> &LeftoverRegs,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) {
+ assert(!LeftoverTy.isValid() && "this is an out argument");
+
+ unsigned RegSize = RegTy.getSizeInBits();
+ unsigned MainSize = MainTy.getSizeInBits();
+ unsigned NumParts = RegSize / MainSize;
+ unsigned LeftoverSize = RegSize - NumParts * MainSize;
+
+ // Use an unmerge when possible.
+ if (LeftoverSize == 0) {
+ for (unsigned I = 0; I < NumParts; ++I)
+ VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+ return true;
+ }
+
+ // Perform irregular split. Leftover is last element of RegPieces.
+ if (MainTy.isVector()) {
+ SmallVector<Register, 8> RegPieces;
+ extractVectorParts(Reg, MainTy.getNumElements(), RegPieces, MIRBuilder,
+ MRI);
+ for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
+ VRegs.push_back(RegPieces[i]);
+ LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
+ LeftoverTy = MRI.getType(LeftoverRegs[0]);
+ return true;
+ }
+
+ LeftoverTy = LLT::scalar(LeftoverSize);
+ // For irregular sizes, extract the individual parts.
+ for (unsigned I = 0; I != NumParts; ++I) {
+ Register NewReg = MRI.createGenericVirtualRegister(MainTy);
+ VRegs.push_back(NewReg);
+ MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
+ }
+
+ for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
+ Offset += LeftoverSize) {
+ Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
+ LeftoverRegs.push_back(NewReg);
+ MIRBuilder.buildExtract(NewReg, Reg, Offset);
+ }
+
+ return true;
+}
+
+void llvm::extractVectorParts(Register Reg, unsigned NumElts,
+ SmallVectorImpl<Register> &VRegs,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) {
+ LLT RegTy = MRI.getType(Reg);
+ assert(RegTy.isVector() && "Expected a vector type");
+
+ LLT EltTy = RegTy.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ unsigned RegNumElts = RegTy.getNumElements();
+ unsigned LeftoverNumElts = RegNumElts % NumElts;
+ unsigned NumNarrowTyPieces = RegNumElts / NumElts;
+
+ // Perfect split without leftover
+ if (LeftoverNumElts == 0)
+ return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs, MIRBuilder,
+ MRI);
+
+ // Irregular split. Provide direct access to all elements for artifact
+ // combiner using unmerge to elements. Then build vectors with NumElts
+ // elements. Remaining element(s) will be (used to build vector) Leftover.
+ SmallVector<Register, 8> Elts;
+ extractParts(Reg, EltTy, RegNumElts, Elts, MIRBuilder, MRI);
+
+ unsigned Offset = 0;
+ // Requested sub-vectors of NarrowTy.
+ for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
+ VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
+ }
+
+ // Leftover element(s).
+ if (LeftoverNumElts == 1) {
+ VRegs.push_back(Elts[Offset]);
+ } else {
+ LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
+ ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
+ VRegs.push_back(
+ MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
+ }
+}
+
MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
const MachineRegisterInfo &MRI) {
MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 71632718857b9..cb04eec19be58 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
>From 6090fb33c8b535014978f15dc381cf22317d1e1e Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Thu, 14 Dec 2023 15:33:38 +0000
Subject: [PATCH 3/3] [GlobalISel] Refactor extractParts()
Try to use unmerge when doing irregular splits where
possible, falling back to extract elements when not.
---
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 41 ++++++++++
.../GISel/AArch64PreLegalizerCombiner.cpp | 74 +++++++++----------
.../GlobalISel/legalize-shuffle-vector.mir | 10 ++-
.../GlobalISel/legalize-store-global.mir | 36 ++++-----
4 files changed, 96 insertions(+), 65 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 83d118a73bbe7..2523fdd732eee 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -506,6 +506,47 @@ bool llvm::extractParts(Register Reg, LLT RegTy, LLT MainTy, LLT &LeftoverTy,
return true;
}
+ // Try to use unmerge for irregular vector split where possible
+ if (RegTy.isVector() && MainTy.isVector()) {
+ unsigned RegNumElts = RegTy.getNumElements();
+ unsigned MainNumElts = MainTy.getNumElements();
+ unsigned LeftoverNumElts = RegNumElts % MainNumElts;
+ // If can unmerge to LeftoverTy, do it
+ if (MainNumElts % LeftoverNumElts == 0 &&
+ RegNumElts % LeftoverNumElts == 0 &&
+ RegTy.getScalarSizeInBits() == MainTy.getScalarSizeInBits() &&
+ LeftoverNumElts > 1) {
+ LeftoverTy =
+ LLT::fixed_vector(LeftoverNumElts, RegTy.getScalarSizeInBits());
+
+ // Unmerge the SrcReg to LeftoverTy vectors
+ SmallVector<Register, 4> UnmergeValues;
+ extractParts(Reg, LeftoverTy, RegNumElts / LeftoverNumElts, UnmergeValues,
+ MIRBuilder, MRI);
+
+ // Find how many LeftoverTy makes one MainTy
+ unsigned LeftoverPerMain = MainNumElts / LeftoverNumElts;
+ unsigned NumOfLeftoverVal =
+ ((RegNumElts % MainNumElts) / LeftoverNumElts);
+
+ // Create as many MainTy as possible using unmerged value
+ SmallVector<Register, 4> MergeValues;
+ for (unsigned I = 0; I < UnmergeValues.size() - NumOfLeftoverVal; I++) {
+ MergeValues.push_back(UnmergeValues[I]);
+ if (MergeValues.size() == LeftoverPerMain) {
+ VRegs.push_back(
+ MIRBuilder.buildMergeLikeInstr(MainTy, MergeValues).getReg(0));
+ MergeValues.clear();
+ }
+ }
+ // Populate LeftoverRegs with the leftovers
+ for (unsigned I = UnmergeValues.size() - NumOfLeftoverVal;
+ I < UnmergeValues.size(); I++) {
+ LeftoverRegs.push_back(UnmergeValues[I]);
+ }
+ return true;
+ }
+ }
// Perform irregular split. Leftover is last element of RegPieces.
if (MainTy.isVector()) {
SmallVector<Register, 8> RegPieces;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index cb04eec19be58..e05d2a1880c1d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -336,53 +336,45 @@ void applyExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
SmallVector<Register, 4> Ext1UnmergeReg;
SmallVector<Register, 4> Ext2UnmergeReg;
if (SrcTy.getNumElements() % 16 != 0) {
- // Unmerge source to v8i8, append a new v8i8 of 0s and the merge to v16s
- SmallVector<Register, 4> PadUnmergeDstReg1;
- SmallVector<Register, 4> PadUnmergeDstReg2;
- unsigned NumOfVec = SrcTy.getNumElements() / 8;
-
- // Unmerge the source to v8i8
- MachineInstr *PadUnmerge1 =
- Builder.buildUnmerge(LLT::fixed_vector(8, 8), Ext1SrcReg);
- MachineInstr *PadUnmerge2 =
- Builder.buildUnmerge(LLT::fixed_vector(8, 8), Ext2SrcReg);
- for (unsigned i = 0; i < NumOfVec; i++) {
- PadUnmergeDstReg1.push_back(PadUnmerge1->getOperand(i).getReg());
- PadUnmergeDstReg2.push_back(PadUnmerge2->getOperand(i).getReg());
+ SmallVector<Register, 1> Leftover1;
+ SmallVector<Register, 1> Leftover2;
+
+ // Split the elements into v16i8 and v8i8
+ LLT MainTy = LLT::fixed_vector(16, 8);
+ LLT LeftoverTy1, LeftoverTy2;
+ if ((!extractParts(Ext1SrcReg, MRI.getType(Ext1SrcReg), MainTy,
+ LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
+ MRI)) ||
+ (!extractParts(Ext2SrcReg, MRI.getType(Ext2SrcReg), MainTy,
+ LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
+ MRI))) {
+ llvm_unreachable("Unable to split this vector properly");
}
- // Pad the vectors with a v8i8 constant of 0s
+ // Pad the leftover v8i8 vector with register of 0s of type v8i8
MachineInstr *v8Zeroes =
Builder.buildConstant(LLT::fixed_vector(8, 8), 0);
- PadUnmergeDstReg1.push_back(v8Zeroes->getOperand(0).getReg());
- PadUnmergeDstReg2.push_back(v8Zeroes->getOperand(0).getReg());
-
- // Merge them all back to v16i8
- NumOfVec = (NumOfVec + 1) / 2;
- for (unsigned i = 0; i < NumOfVec; i++) {
- Ext1UnmergeReg.push_back(
- Builder
- .buildMergeLikeInstr(
- LLT::fixed_vector(16, 8),
- {PadUnmergeDstReg1[i * 2], PadUnmergeDstReg1[(i * 2) + 1]})
- .getReg(0));
- Ext2UnmergeReg.push_back(
- Builder
- .buildMergeLikeInstr(
- LLT::fixed_vector(16, 8),
- {PadUnmergeDstReg2[i * 2], PadUnmergeDstReg2[(i * 2) + 1]})
- .getReg(0));
- }
+ Leftover1.push_back(v8Zeroes->getOperand(0).getReg());
+ Leftover2.push_back(v8Zeroes->getOperand(0).getReg());
+
+ Ext1UnmergeReg.push_back(
+ Builder
+ .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
+ {Leftover1[0], Leftover1[1]})
+ .getReg(0));
+ Ext2UnmergeReg.push_back(
+ Builder
+ .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
+ {Leftover2[0], Leftover2[1]})
+ .getReg(0));
+
} else {
// Unmerge the source vectors to v16i8
- MachineInstr *Ext1Unmerge =
- Builder.buildUnmerge(LLT::fixed_vector(16, 8), Ext1SrcReg);
- MachineInstr *Ext2Unmerge =
- Builder.buildUnmerge(LLT::fixed_vector(16, 8), Ext2SrcReg);
- for (unsigned i = 0, e = SrcTy.getNumElements() / 16; i < e; i++) {
- Ext1UnmergeReg.push_back(Ext1Unmerge->getOperand(i).getReg());
- Ext2UnmergeReg.push_back(Ext2Unmerge->getOperand(i).getReg());
- }
+ unsigned SrcNumElts = SrcTy.getNumElements();
+ extractParts(Ext1SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
+ Ext1UnmergeReg, Builder, MRI);
+ extractParts(Ext2SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
+ Ext2UnmergeReg, Builder, MRI);
}
// Build the UDOT instructions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index b1d0ddd098150..07946388590e2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -460,12 +460,14 @@ body: |
; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C3]](s64)
; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF1]](<4 x s32>), [[C]](s64)
; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF1]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC4]](s32), [[EVEC5]](s32)
- ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY8]](p0) :: (store (<4 x s32>), align 32)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC2]](s32), [[EVEC3]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC4]](s32), [[EVEC5]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s32>), [[BUILD_VECTOR3]](<2 x s32>)
+ ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY8]](p0) :: (store (<4 x s32>), align 32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64)
- ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<2 x s32>), [[PTR_ADD]](p0) :: (store (<2 x s32>) into unknown-address + 16, align 16)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<2 x s32>), [[PTR_ADD]](p0) :: (store (<2 x s32>) into unknown-address + 16, align 16)
; CHECK-NEXT: RET_ReallyLR
%3:_(s32) = COPY $s0
%4:_(s32) = COPY $s1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index c2fdbff77868f..75dc5d4dc593f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -6827,13 +6827,12 @@ body: |
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
- ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>)
+ ; SI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
;
; CI-LABEL: name: test_store_global_v12s16_align4
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
@@ -6841,13 +6840,12 @@ body: |
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
- ; CI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
- ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>)
+ ; CI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
;
; VI-LABEL: name: test_store_global_v12s16_align4
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
@@ -6855,13 +6853,12 @@ body: |
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
- ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>)
+ ; VI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v12s16_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
@@ -6869,13 +6866,12 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
- ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>)
+ ; GFX9-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<12 x s16>) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1)
More information about the llvm-commits
mailing list