[llvm] 490e348 - AMDGPU: Partially fix machine uniformity for inline asm

Mon Jan 30 11:47:26 PST 2023

Author: Matt Arsenault
Date: 2023-01-30T15:47:18-04:00
New Revision: 490e348e67945a4a7e118709579f17b2c96b40bd

URL: https://github.com/llvm/llvm-project/commit/490e348e67945a4a7e118709579f17b2c96b40bd
DIFF: https://github.com/llvm/llvm-project/commit/490e348e67945a4a7e118709579f17b2c96b40bd.diff

LOG: AMDGPU: Partially fix machine uniformity for inline asm

This was assuming virtual registers only, and asserting on physical.
This was also ignoring AGPRs, and only considering VGPRs.

Reporting the instruction as uniform or not is conceptually wrong,
this should be reported per-operand. An inline asm statement could
include uniform and non-uniform components. This should report
purely for the register defs and ignore the uses.

Fixes asserting on most of the inline asm tests when uniformity
analysis is used.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/RegisterBankInfo.h
    llvm/lib/CodeGen/RegisterBankInfo.cpp
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/RegisterBankInfo.h
index bba4f1f025a06..2606b04a02060 100644

--- a/llvm/include/llvm/CodeGen/RegisterBankInfo.h
+++ b/llvm/include/llvm/CodeGen/RegisterBankInfo.h
@@ -435,7 +435,7 @@ class RegisterBankInfo {
 
   /// Get the MinimalPhysRegClass for Reg.
   /// \pre Reg is a physical register.
-  const TargetRegisterClass &
+  const TargetRegisterClass *
   getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const;
 
   /// Try to get the mapping of \p MI.

diff  --git a/llvm/lib/CodeGen/RegisterBankInfo.cpp b/llvm/lib/CodeGen/RegisterBankInfo.cpp
index 27ed17b9f4f61..801b82e596739 100644
--- a/llvm/lib/CodeGen/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -79,13 +79,13 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
 const RegisterBank *
 RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
                              const TargetRegisterInfo &TRI) const {
-  if (Reg.isPhysical()) {
+  if (!Reg.isVirtual()) {
     // FIXME: This was probably a copy to a virtual register that does have a
     // type we could use.
-    return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT());
+    const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg, TRI);
+    return RC ? &getRegBankFromRegClass(*RC, LLT()) : nullptr;
   }
 
-  assert(Reg && "NoRegister does not have a register bank");
   const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
   if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
     return RB;
@@ -94,16 +94,16 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
   return nullptr;
 }
 
-const TargetRegisterClass &
+const TargetRegisterClass *
 RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
                                          const TargetRegisterInfo &TRI) const {
   assert(Reg.isPhysical() && "Reg must be a physreg");
   const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
   if (RegRCIt != PhysRegMinimalRCs.end())
-    return *RegRCIt->second;
-  const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg);
+    return RegRCIt->second;
+  const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClassLLT(Reg, LLT());
   PhysRegMinimalRCs[Reg] = PhysRC;
-  return *PhysRC;
+  return PhysRC;
 }
 
 const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
@@ -498,7 +498,7 @@ unsigned RegisterBankInfo::getSizeInBits(Register Reg,
     // Instead, we need to access a register class that contains Reg and
     // get the size of that register class.
     // Because this is expensive, we'll cache the register class by calling
-    auto *RC = &getMinimalPhysRegClass(Reg, TRI);
+    auto *RC = getMinimalPhysRegClass(Reg, TRI);
     assert(RC && "Expecting Register class");
     return TRI.getRegSizeInBits(*RC);
   }

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 2b1646af175f9..4177a8307ad8e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
 
 #include "AMDGPUCallLowering.h"
+#include "AMDGPURegisterBankInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "SIFrameLowering.h"
 #include "SIISelLowering.h"
@@ -51,7 +52,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
   std::unique_ptr<InstructionSelector> InstSelector;
   std::unique_ptr<LegalizerInfo> Legalizer;
-  std::unique_ptr<RegisterBankInfo> RegBankInfo;
+  std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
 
 protected:
   // Basic subtarget description.
@@ -249,7 +250,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return Legalizer.get();
   }
 
-  const RegisterBankInfo *getRegBankInfo() const override {
+  const AMDGPURegisterBankInfo *getRegBankInfo() const override {
     return RegBankInfo.get();
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7dfcca63542c0..e3f06fe8c3b27 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8403,6 +8403,22 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
 
 InstructionUniformity
 SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
+  unsigned opcode = MI.getOpcode();
+  if (MI.isCopy()) {
+    const MachineOperand &srcOp = MI.getOperand(1);
+    if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
+      const TargetRegisterClass *regClass =
+          RI.getPhysRegBaseClass(srcOp.getReg());
+      return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform
+                                      : InstructionUniformity::NeverUniform;
+    }
+    return InstructionUniformity::Default;
+  }
+
+  // GMIR handling
+  if (MI.isPreISelOpcode())
+    return SIInstrInfo::getGenericInstructionUniformity(MI);
+
   // Atomics are divergent because they are executed sequentially: when an
   // atomic operation refers to the same address in each thread, then each
   // thread after the first sees the value written by the previous thread as
@@ -8429,49 +8445,32 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
     return InstructionUniformity::Default;
   }
 
-  unsigned opcode = MI.getOpcode();
-  if (opcode == AMDGPU::COPY) {
-    const MachineOperand &srcOp = MI.getOperand(1);
-    if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
-      const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg());
-      return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform
-                                      : InstructionUniformity::NeverUniform;
-    }
-    return InstructionUniformity::Default;
-  }
-  if (opcode == AMDGPU::INLINEASM || opcode == AMDGPU::INLINEASM_BR) {
-    const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
-    for (auto &op : MI.operands()) {
-      if (!op.isReg() || !op.isDef())
-        continue;
-      auto *RC = MRI.getRegClass(op.getReg());
-      if (!RC || RI.isDivergentRegClass(RC))
-        return InstructionUniformity::NeverUniform;
-    }
-    return InstructionUniformity::AlwaysUniform;
-  }
   if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
     return InstructionUniformity::AlwaysUniform;
 
   if (opcode == AMDGPU::V_WRITELANE_B32)
     return InstructionUniformity::NeverUniform;
 
-  // GMIR handling
-  if (SIInstrInfo::isGenericOpcode(opcode))
-    return SIInstrInfo::getGenericInstructionUniformity(MI);
-
-  // Handling $vpgr reads
-  for (auto srcOp : MI.operands()) {
-    if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
-      const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg());
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo();
+
+  // FIXME: It's conceptually broken to report this for an instruction, and not
+  // a specific def operand. For inline asm in particular, there could be mixed
+  // uniform and divergent results.
+  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+    const MachineOperand &SrcOp = MI.getOperand(I);
+    if (!SrcOp.isReg())
+      continue;
 
-      // If the class is missing it's an unallocatable scalar of some kind.
-      if (!regClass)
-        continue;
+    Register Reg = SrcOp.getReg();
+    if (!Reg || !SrcOp.readsReg())
+      continue;
 
-      if (RI.isVGPRClass(regClass))
-        return InstructionUniformity::NeverUniform;
-    }
+    // If RegBank is null, this is unassigned or an unallocatable special
+    // register, which are all scalars.
+    const RegisterBank *RegBank = RBI->getRegBank(Reg, MRI, RI);
+    if (RegBank && RegBank->getID() != AMDGPU::SGPRRegBankID)
+      return InstructionUniformity::NeverUniform;
   }
 
   // TODO: Uniformity check condtions above can be rearranged for more