[llvm] [AMD][GISel] legalize `G_INSERT_SUBVECTOR` (PR #128922)
Maksim Levental via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 09:53:28 PST 2025
https://github.com/makslevental created https://github.com/llvm/llvm-project/pull/128922
None
>From f6b92e0257eb0938de69c3ec5806b080aa3320b4 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental at gmail.com>
Date: Wed, 26 Feb 2025 12:52:50 -0500
Subject: [PATCH] [AMD][GISel] legalize `G_INSERT_SUBVECTOR`
---
.../GlobalISel/GIMatchTableExecutorImpl.h | 4 ++
.../CodeGen/GlobalISel/MachineIRBuilder.h | 10 ++++
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 3 +
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 60 +++++++++++++++++++
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 +
5 files changed, 79 insertions(+)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
index 2c57f2b5aa029..903262b25fa75 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
@@ -175,6 +175,10 @@ bool GIMatchTableExecutor::executeMatchTable(
break;
}
+ // TODO(max): null deref further down but on exactly the call
+ // TODO(max): comment out to see where the actual fail is
+ auto *DefMI = MRI.getVRegDef(MO.getReg());
+ assert(DefMI);
MachineInstr *NewMI;
if (MatcherOpcode == GIM_RecordInsnIgnoreCopies)
NewMI = getDefIgnoringCopies(MO.getReg(), MRI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 7b0475ac2481d..c88bf44c9b1e0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1381,6 +1381,16 @@ class MachineIRBuilder {
Res, Val, buildConstant(LLT::scalar(VecIdxWidth), Idx));
}
+ MachineInstrBuilder buildInsertVectorElementConstant(const DstOp &Res,
+ const SrcOp &Val,
+ const SrcOp &Elt,
+ const int Idx) {
+ auto TLI = getMF().getSubtarget().getTargetLowering();
+ unsigned VecIdxWidth = TLI->getVectorIdxTy(getDataLayout()).getSizeInBits();
+ return buildInsertVectorElement(
+ Res, Val, Elt, buildConstant(LLT::scalar(VecIdxWidth), Idx));
+ }
+
/// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
///
/// \pre setBasicBlock or setMI must have been called.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d4cb224c35d74..107bb4f63ffe0 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -124,6 +124,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
if (isa<GIntrinsic>(MI))
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
+ if (isa<GInsertSubvector>(MI))
+ return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
+ : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
case Legal:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 908d323c7fec9..18747b43ac1cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2131,6 +2131,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
return legalizeExtractVectorElt(MI, MRI, B);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return legalizeInsertVectorElt(MI, MRI, B);
+ case TargetOpcode::G_INSERT_SUBVECTOR:
+ return legalizeInsertSubVector(MI, MRI, B);
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
return legalizeSinCos(MI, MRI, B);
@@ -2828,6 +2830,64 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
return true;
}
+// This lowers an G_INSERT_SUBVECTOR by extracting the individual elements from
+// the small vector and inserting them into the big vector. That is better than
+// the default expansion of doing it via a stack slot. Even though the use of
+// the stack slot would be optimized away afterwards, the stack slot itself
+// remains.
+bool AMDGPULegalizerInfo::legalizeInsertSubVector(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+
+ GInsertSubvector *ES = cast<GInsertSubvector>(&MI);
+ Register Vec = ES->getBigVec();
+ Register Ins = ES->getSubVec();
+ uint64_t IdxVal = ES->getIndexImm();
+
+ LLT VecVT = MRI.getType(Vec);
+ LLT InsVT = MRI.getType(Ins);
+ LLT EltVT = VecVT.getElementType();
+ assert(VecVT.getElementType() == InsVT.getElementType());
+
+ ElementCount InsVTEC = InsVT.getElementCount();
+ auto InsNumElts = InsVTEC.getKnownMinValue();
+
+ if (EltVT.getScalarSizeInBits() == 16 && IdxVal % 2 == 0) {
+ // Insert 32-bit registers at a time.
+ assert(InsNumElts % 2 == 0 && "expect legal vector types");
+
+ ElementCount VecVTEC = VecVT.getElementCount();
+ LLT NewVecVT = LLT::vector(VecVTEC.divideCoefficientBy(2), S32);
+ LLT NewInsVT = InsNumElts == 2
+ ? S32
+ : LLT::vector(InsVTEC.divideCoefficientBy(2), S32);
+
+ auto VecB = B.buildBitcast(NewVecVT, Vec);
+ auto InsB = B.buildBitcast(NewInsVT, Ins);
+
+ for (unsigned I = 0; I != InsNumElts / 2; ++I) {
+ MachineInstrBuilder Elt;
+ if (InsNumElts == 2) {
+ Elt = InsB;
+ } else {
+ Elt = B.buildExtractVectorElementConstant(S32, InsB, I);
+ }
+ VecB = B.buildInsertVectorElementConstant(NewVecVT, VecB, Elt, IdxVal / 2 + I);
+ }
+ auto R = B.buildBitcast(VecVT, VecB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ for (unsigned I = 0; I != InsNumElts; ++I) {
+ auto Elt = B.buildExtractVectorElementConstant(EltVT, Ins, I);
+ Vec = B.buildInsertVectorElementConstant(VecVT, Vec, Elt, IdxVal + I).getReg(0);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeSinCos(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 03b7c36fc450f..78d4d8bd8a3f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -60,6 +60,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
MachineIRBuilder &B) const;
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeInsertSubVector(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
More information about the llvm-commits
mailing list