[llvm] [AMDGPU][True16] si-fold-operand selecting srcidx for v_mov_b16_t16_e64 (PR #162101)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 14 11:38:54 PDT 2025
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/162101
>From 63780c264e597a236f6be28b8e0c18f418ec19d7 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 6 Oct 2025 10:56:58 -0400
Subject: [PATCH] follow up patch
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 2 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 47 ++++++-----------------
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 +++-
3 files changed, 19 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 90c828ba8dfab..86b0b9e8f6f29 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -932,7 +932,7 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
SubDef && TII.isFoldableCopy(*SubDef);
SubDef = MRI.getVRegDef(Sub->getReg())) {
- unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
+ const int SrcIdx = SubDef->getOpcode() == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);
if (SrcOp.isImm())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 46757cf5fe90c..bab07d3562ceb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3408,10 +3408,9 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
}
}
-bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
+bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B16_t16_e32:
- case AMDGPU::V_MOV_B16_t16_e64:
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
@@ -3428,34 +3427,10 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
return true;
- default:
- return false;
- }
-}
-
-unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case AMDGPU::V_MOV_B16_t16_e32:
case AMDGPU::V_MOV_B16_t16_e64:
- return 2;
- case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_B64_PSEUDO:
- case AMDGPU::V_MOV_B64_e32:
- case AMDGPU::V_MOV_B64_e64:
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::S_MOV_B64_IMM_PSEUDO:
- case AMDGPU::COPY:
- case AMDGPU::WWM_COPY:
- case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
- case AMDGPU::V_ACCVGPR_READ_B32_e64:
- case AMDGPU::V_ACCVGPR_MOV_B32:
- case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
- case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
- return 1;
+ return !hasAnyModifiersSet(MI);
default:
- llvm_unreachable("MI is not a foldable copy");
+ return false;
}
}
@@ -3976,12 +3951,13 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
return false;
}
-static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
- int64_t &Imm, MachineInstr **DefMI = nullptr) {
+bool SIInstrInfo::getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
+ int64_t &Imm,
+ MachineInstr **DefMI = nullptr) const {
if (Reg.isPhysical())
return false;
auto *Def = MRI.getUniqueVRegDef(Reg);
- if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
+ if (Def && isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
Imm = Def->getOperand(1).getImm();
if (DefMI)
*DefMI = Def;
@@ -3990,8 +3966,8 @@ static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
return false;
}
-static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
- MachineInstr **DefMI = nullptr) {
+bool SIInstrInfo::getFoldableImm(const MachineOperand *MO, int64_t &Imm,
+ MachineInstr **DefMI = nullptr) const {
if (!MO->isReg())
return false;
const MachineFunction *MF = MO->getParent()->getParent()->getParent();
@@ -10643,10 +10619,11 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return false;
int64_t Mask;
- const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool {
+ const auto isMask = [&Mask, SrcSize,
+ this](const MachineOperand *MO) -> bool {
if (MO->isImm())
Mask = MO->getImm();
- else if (!getFoldableImm(MO, Mask))
+ else if (!this->getFoldableImm(MO, Mask))
return false;
Mask &= maxUIntN(SrcSize);
return isPowerOf2_64(Mask);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index cc59acf1ebd94..f16e0738872cb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -416,8 +416,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const override;
- static bool isFoldableCopy(const MachineInstr &MI);
- static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
+ bool isFoldableCopy(const MachineInstr &MI) const;
+
+ bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
+ int64_t &Imm, MachineInstr **DefMI) const;
+ bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
+ MachineInstr **DefMI) const;
void removeModOperands(MachineInstr &MI) const;
More information about the llvm-commits
mailing list