[llvm] [AMD][GISel] legalize `G_INSERT_SUBVECTOR` (PR #128923)
Maksim Levental via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 09:56:17 PST 2025
https://github.com/makslevental created https://github.com/llvm/llvm-project/pull/128923
None
>From db2e65518d70fde089e0d9b08f382ea6951cd15e Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental at gmail.com>
Date: Wed, 26 Feb 2025 12:52:50 -0500
Subject: [PATCH] [AMD][GISel] legalize `G_INSERT_SUBVECTOR`
---
.../CodeGen/GlobalISel/MachineIRBuilder.h | 10 ++++
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 3 +
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 1 +
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 60 +++++++++++++++++++
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 +
5 files changed, 76 insertions(+)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 7b0475ac2481d..c88bf44c9b1e0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1381,6 +1381,16 @@ class MachineIRBuilder {
Res, Val, buildConstant(LLT::scalar(VecIdxWidth), Idx));
}
+ MachineInstrBuilder buildInsertVectorElementConstant(const DstOp &Res,
+ const SrcOp &Val,
+ const SrcOp &Elt,
+ const int Idx) {
+ auto TLI = getMF().getSubtarget().getTargetLowering();
+ unsigned VecIdxWidth = TLI->getVectorIdxTy(getDataLayout()).getSizeInBits();
+ return buildInsertVectorElement(
+ Res, Val, Elt, buildConstant(LLT::scalar(VecIdxWidth), Idx));
+ }
+
/// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
///
/// \pre setBasicBlock or setMI must have been called.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d4cb224c35d74..107bb4f63ffe0 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -124,6 +124,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
if (isa<GIntrinsic>(MI))
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
+ if (isa<GInsertSubvector>(MI))
+ return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
+ : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
case Legal:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 625d556e3ff5e..76e498650d9d8 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -467,6 +467,7 @@ std::optional<DefinitionAndSourceRegister>
llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
Register DefSrcReg = Reg;
auto *DefMI = MRI.getVRegDef(Reg);
+ assert(DefMI && "expected non-null machine instr");
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
if (!DstTy.isValid())
return std::nullopt;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 908d323c7fec9..18747b43ac1cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2131,6 +2131,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
return legalizeExtractVectorElt(MI, MRI, B);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return legalizeInsertVectorElt(MI, MRI, B);
+ case TargetOpcode::G_INSERT_SUBVECTOR:
+ return legalizeInsertSubVector(MI, MRI, B);
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
return legalizeSinCos(MI, MRI, B);
@@ -2828,6 +2830,64 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
return true;
}
+// This lowers an G_INSERT_SUBVECTOR by extracting the individual elements from
+// the small vector and inserting them into the big vector. That is better than
+// the default expansion of doing it via a stack slot. Even though the use of
+// the stack slot would be optimized away afterwards, the stack slot itself
+// remains.
+bool AMDGPULegalizerInfo::legalizeInsertSubVector(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+
+ GInsertSubvector *ES = cast<GInsertSubvector>(&MI);
+ Register Vec = ES->getBigVec();
+ Register Ins = ES->getSubVec();
+ uint64_t IdxVal = ES->getIndexImm();
+
+ LLT VecVT = MRI.getType(Vec);
+ LLT InsVT = MRI.getType(Ins);
+ LLT EltVT = VecVT.getElementType();
+ assert(VecVT.getElementType() == InsVT.getElementType());
+
+ ElementCount InsVTEC = InsVT.getElementCount();
+ auto InsNumElts = InsVTEC.getKnownMinValue();
+
+ if (EltVT.getScalarSizeInBits() == 16 && IdxVal % 2 == 0) {
+ // Insert 32-bit registers at a time.
+ assert(InsNumElts % 2 == 0 && "expect legal vector types");
+
+ ElementCount VecVTEC = VecVT.getElementCount();
+ LLT NewVecVT = LLT::vector(VecVTEC.divideCoefficientBy(2), S32);
+ LLT NewInsVT = InsNumElts == 2
+ ? S32
+ : LLT::vector(InsVTEC.divideCoefficientBy(2), S32);
+
+ auto VecB = B.buildBitcast(NewVecVT, Vec);
+ auto InsB = B.buildBitcast(NewInsVT, Ins);
+
+ for (unsigned I = 0; I != InsNumElts / 2; ++I) {
+ MachineInstrBuilder Elt;
+ if (InsNumElts == 2) {
+ Elt = InsB;
+ } else {
+ Elt = B.buildExtractVectorElementConstant(S32, InsB, I);
+ }
+ VecB = B.buildInsertVectorElementConstant(NewVecVT, VecB, Elt, IdxVal / 2 + I);
+ }
+ auto R = B.buildBitcast(VecVT, VecB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ for (unsigned I = 0; I != InsNumElts; ++I) {
+ auto Elt = B.buildExtractVectorElementConstant(EltVT, Ins, I);
+ Vec = B.buildInsertVectorElementConstant(VecVT, Vec, Elt, IdxVal + I).getReg(0);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeSinCos(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 03b7c36fc450f..78d4d8bd8a3f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -60,6 +60,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
MachineIRBuilder &B) const;
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeInsertSubVector(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
More information about the llvm-commits
mailing list