[llvm] [AMDGPU][True16] si-fold-operand selecting srcidx for v_mov_b16_t16_e64 (PR #162101)

Wed Oct 15 09:01:58 PDT 2025

https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/162101

>From 63780c264e597a236f6be28b8e0c18f418ec19d7 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 6 Oct 2025 10:56:58 -0400
Subject: [PATCH 1/2] follow up patch

---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp |  2 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp    | 47 ++++++-----------------
 llvm/lib/Target/AMDGPU/SIInstrInfo.h      |  8 +++-
 3 files changed, 19 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 90c828ba8dfab..86b0b9e8f6f29 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -932,7 +932,7 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
   for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
        SubDef && TII.isFoldableCopy(*SubDef);
        SubDef = MRI.getVRegDef(Sub->getReg())) {
-    unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
+    const int SrcIdx = SubDef->getOpcode() == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
     MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);
 
     if (SrcOp.isImm())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 46757cf5fe90c..bab07d3562ceb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3408,10 +3408,9 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
   }
 }
 
-bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
+bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case AMDGPU::V_MOV_B16_t16_e32:
-  case AMDGPU::V_MOV_B16_t16_e64:
   case AMDGPU::V_MOV_B32_e32:
   case AMDGPU::V_MOV_B32_e64:
   case AMDGPU::V_MOV_B64_PSEUDO:
@@ -3428,34 +3427,10 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
   case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
   case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
     return true;
-  default:
-    return false;
-  }
-}
-
-unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case AMDGPU::V_MOV_B16_t16_e32:
   case AMDGPU::V_MOV_B16_t16_e64:
-    return 2;
-  case AMDGPU::V_MOV_B32_e32:
-  case AMDGPU::V_MOV_B32_e64:
-  case AMDGPU::V_MOV_B64_PSEUDO:
-  case AMDGPU::V_MOV_B64_e32:
-  case AMDGPU::V_MOV_B64_e64:
-  case AMDGPU::S_MOV_B32:
-  case AMDGPU::S_MOV_B64:
-  case AMDGPU::S_MOV_B64_IMM_PSEUDO:
-  case AMDGPU::COPY:
-  case AMDGPU::WWM_COPY:
-  case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
-  case AMDGPU::V_ACCVGPR_READ_B32_e64:
-  case AMDGPU::V_ACCVGPR_MOV_B32:
-  case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
-  case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
-    return 1;
+    return !hasAnyModifiersSet(MI);
   default:
-    llvm_unreachable("MI is not a foldable copy");
+    return false;
   }
 }
 
@@ -3976,12 +3951,13 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
   return false;
 }
 
-static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
-                           int64_t &Imm, MachineInstr **DefMI = nullptr) {
+bool SIInstrInfo::getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
+                                 int64_t &Imm,
+                                 MachineInstr **DefMI = nullptr) const {
   if (Reg.isPhysical())
     return false;
   auto *Def = MRI.getUniqueVRegDef(Reg);
-  if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
+  if (Def && isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
     Imm = Def->getOperand(1).getImm();
     if (DefMI)
       *DefMI = Def;
@@ -3990,8 +3966,8 @@ static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
   return false;
 }
 
-static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
-                           MachineInstr **DefMI = nullptr) {
+bool SIInstrInfo::getFoldableImm(const MachineOperand *MO, int64_t &Imm,
+                                 MachineInstr **DefMI = nullptr) const {
   if (!MO->isReg())
     return false;
   const MachineFunction *MF = MO->getParent()->getParent()->getParent();
@@ -10643,10 +10619,11 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
       return false;
 
     int64_t Mask;
-    const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool {
+    const auto isMask = [&Mask, SrcSize,
+                         this](const MachineOperand *MO) -> bool {
       if (MO->isImm())
         Mask = MO->getImm();
-      else if (!getFoldableImm(MO, Mask))
+      else if (!this->getFoldableImm(MO, Mask))
         return false;
       Mask &= maxUIntN(SrcSize);
       return isPowerOf2_64(Mask);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index cc59acf1ebd94..f16e0738872cb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -416,8 +416,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
                                   const MachineInstr &MIb) const override;
 
-  static bool isFoldableCopy(const MachineInstr &MI);
-  static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
+  bool isFoldableCopy(const MachineInstr &MI) const;
+
+  bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
+                      int64_t &Imm, MachineInstr **DefMI) const;
+  bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
+                      MachineInstr **DefMI) const;
 
   void removeModOperands(MachineInstr &MI) const;
 

>From ae234342342b3953358e27a0de0dd9e39c6650e6 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 15 Oct 2025 12:01:18 -0400
Subject: [PATCH 2/2] convert more to static

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 24 +++++++++++-------------
 llvm/lib/Target/AMDGPU/SIInstrInfo.h   | 20 ++++++++------------
 2 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index bab07d3562ceb..0a8cedb3ff3d8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3408,7 +3408,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
   }
 }
 
-bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
+bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case AMDGPU::V_MOV_B16_t16_e32:
   case AMDGPU::V_MOV_B32_e32:
@@ -3951,13 +3951,12 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
   return false;
 }
 
-bool SIInstrInfo::getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
-                                 int64_t &Imm,
-                                 MachineInstr **DefMI = nullptr) const {
+static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
+                           int64_t &Imm, MachineInstr **DefMI = nullptr) {
   if (Reg.isPhysical())
     return false;
   auto *Def = MRI.getUniqueVRegDef(Reg);
-  if (Def && isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
+  if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
     Imm = Def->getOperand(1).getImm();
     if (DefMI)
       *DefMI = Def;
@@ -3966,8 +3965,8 @@ bool SIInstrInfo::getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
   return false;
 }
 
-bool SIInstrInfo::getFoldableImm(const MachineOperand *MO, int64_t &Imm,
-                                 MachineInstr **DefMI = nullptr) const {
+static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
+                           MachineInstr **DefMI = nullptr) {
   if (!MO->isReg())
     return false;
   const MachineFunction *MF = MO->getParent()->getParent()->getParent();
@@ -4691,12 +4690,12 @@ bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
 }
 
 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
-                                  AMDGPU::OpName OpName) const {
+                                  AMDGPU::OpName OpName) {
   const MachineOperand *Mods = getNamedOperand(MI, OpName);
   return Mods && Mods->getImm();
 }
 
-bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
+bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) {
   return any_of(ModifierOpNames,
                 [&](AMDGPU::OpName Name) { return hasModifiersSet(MI, Name); });
 }
@@ -9338,7 +9337,7 @@ Register SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
 }
 
 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
-                                             AMDGPU::OpName OperandName) const {
+                                             AMDGPU::OpName OperandName) {
   if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
     return nullptr;
 
@@ -10619,11 +10618,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
       return false;
 
     int64_t Mask;
-    const auto isMask = [&Mask, SrcSize,
-                         this](const MachineOperand *MO) -> bool {
+    const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool {
       if (MO->isImm())
         Mask = MO->getImm();
-      else if (!this->getFoldableImm(MO, Mask))
+      else if (!getFoldableImm(MO, Mask))
         return false;
       Mask &= maxUIntN(SrcSize);
       return isPowerOf2_64(Mask);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index f16e0738872cb..9924dfc8b3b33 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -416,12 +416,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
                                   const MachineInstr &MIb) const override;
 
-  bool isFoldableCopy(const MachineInstr &MI) const;
-
-  bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
-                      int64_t &Imm, MachineInstr **DefMI) const;
-  bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
-                      MachineInstr **DefMI) const;
+  static bool isFoldableCopy(const MachineInstr &MI);
+  static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
 
   void removeModOperands(MachineInstr &MI) const;
 
@@ -1258,8 +1254,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   ///  e.g. src[012]_mod, omod, clamp.
   bool hasModifiers(unsigned Opcode) const;
 
-  bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const;
-  bool hasAnyModifiersSet(const MachineInstr &MI) const;
+  static bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName);
+  static bool hasAnyModifiersSet(const MachineInstr &MI);
 
   bool canShrink(const MachineInstr &MI,
                  const MachineRegisterInfo &MRI) const;
@@ -1435,12 +1431,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   /// Returns the operand named \p Op.  If \p MI does not have an
   /// operand named \c Op, this function returns nullptr.
   LLVM_READONLY
-  MachineOperand *getNamedOperand(MachineInstr &MI,
-                                  AMDGPU::OpName OperandName) const;
+  static MachineOperand *getNamedOperand(MachineInstr &MI,
+                                         AMDGPU::OpName OperandName);
 
   LLVM_READONLY
-  const MachineOperand *getNamedOperand(const MachineInstr &MI,
-                                        AMDGPU::OpName OperandName) const {
+  static const MachineOperand *getNamedOperand(const MachineInstr &MI,
+                                               AMDGPU::OpName OperandName) {
     return getNamedOperand(const_cast<MachineInstr &>(MI), OperandName);
   }