[llvm] AMDGPU: Extract lambda used in foldImmediate into a helper function (PR #127484)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 20:25:37 PST 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127484
>From 33506ad7dbbfbaeab22a06ecdc95a2ca87f41293 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 17 Feb 2025 17:12:22 +0700
Subject: [PATCH 1/2] AMDGPU: Extract lambda used in foldImmediate into a
helper function
It was also too permissive for a more general utilty, only return
the original immediate if there is no subregister.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 59 ++++++++++++++++----------
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 9 ++++
2 files changed, 45 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4ee5ebd7681b8..07addb38b8711 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
}
}
+std::optional<int64_t> SIInstrInfo::extractSubregFromImm(int64_t Imm,
+ unsigned SubRegIndex) {
+ switch (SubRegIndex) {
+ case AMDGPU::NoSubRegister:
+ return Imm;
+ case AMDGPU::sub0:
+ return Lo_32(Imm);
+ case AMDGPU::sub1:
+ return Hi_32(Imm);
+ case AMDGPU::lo16:
+ return SignExtend64<16>(Imm);
+ case AMDGPU::hi16:
+ return SignExtend64<16>(Imm >> 16);
+ case AMDGPU::sub1_lo16:
+ return SignExtend64<16>(Imm >> 32);
+ case AMDGPU::sub1_hi16:
+ return SignExtend64<16>(Imm >> 48);
+ default:
+ return std::nullopt;
+ }
+
+ llvm_unreachable("covered subregister switch");
+}
+
bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg, MachineRegisterInfo *MRI) const {
if (!MRI->hasOneNonDBGUse(Reg))
@@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (!getConstValDefinedInReg(DefMI, Reg, Imm))
return false;
- auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t {
- switch (UseOp.getSubReg()) {
- default:
- return Imm;
- case AMDGPU::sub0:
- return Lo_32(Imm);
- case AMDGPU::sub1:
- return Hi_32(Imm);
- case AMDGPU::lo16:
- return SignExtend64<16>(Imm);
- case AMDGPU::hi16:
- return SignExtend64<16>(Imm >> 16);
- case AMDGPU::sub1_lo16:
- return SignExtend64<16>(Imm >> 32);
- case AMDGPU::sub1_hi16:
- return SignExtend64<16>(Imm >> 48);
- }
- };
-
assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form");
unsigned Opc = UseMI.getOpcode();
@@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
: AMDGPU::V_MOV_B32_e32
: Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
: AMDGPU::S_MOV_B32;
- APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)),
+
+ std::optional<int64_t> SubRegImm =
+ extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg());
+
+ APInt Imm(Is64Bit ? 64 : 32, *SubRegImm,
/*isSigned=*/true, /*implicitTrunc=*/true);
if (RI.isAGPR(*MRI, DstReg)) {
@@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
return false;
- const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
+ const std::optional<int64_t> SubRegImm = extractSubregFromImm(
+ Imm, RegSrc == Src1 ? Src0->getSubReg() : Src1->getSubReg());
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
@@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
- Src1->ChangeToImmediate(Imm);
+ Src1->ChangeToImmediate(*SubRegImm);
removeModOperands(UseMI);
UseMI.setDesc(get(NewOpc));
@@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ const std::optional<int64_t> SubRegImm =
+ extractSubregFromImm(Imm, Src2->getSubReg());
+
// ChangingToImmediate adds Src2 back to the instruction.
- Src2->ChangeToImmediate(getImmFor(*Src2));
+ Src2->ChangeToImmediate(*SubRegImm);
// These come before src2.
removeModOperands(UseMI);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index ddd15e1766f70..06dbdf65e458f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
void removeModOperands(MachineInstr &MI) const;
+ /// Return the extracted immediate value in a subregister use from a constant
+ /// materialized in a super register.
+ ///
+ /// e.g. %imm = S_MOV_B64 K[0:63]
+ /// USE %imm.sub1
+ /// This will return k[32:63]
+ static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
+ unsigned SubRegIndex);
+
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const final;
>From 5d9454b3c5cfb9a4f199a6e4c44f24e0cf18e5c2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 17 Feb 2025 23:56:04 +0700
Subject: [PATCH 2/2] Update llvm/lib/Target/AMDGPU/SIInstrInfo.h
Co-authored-by: Shilei Tian <i at tianshilei.me>
---
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 06dbdf65e458f..79ecc2a657ed0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -406,7 +406,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
///
/// e.g. %imm = S_MOV_B64 K[0:63]
/// USE %imm.sub1
- /// This will return k[32:63]
+ /// This will return K[32:63]
static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
unsigned SubRegIndex);
More information about the llvm-commits
mailing list