[llvm] cde2f33 - [AMDGPU] Introduce never uniform bit field in tablegen

Tue Feb 7 22:17:27 PST 2023

Author: Yashwant Singh
Date: 2023-02-08T11:45:48+05:30
New Revision: cde2f330b36fc36760329be1d3c52e92da400663

URL: https://github.com/llvm/llvm-project/commit/cde2f330b36fc36760329be1d3c52e92da400663
DIFF: https://github.com/llvm/llvm-project/commit/cde2f330b36fc36760329be1d3c52e92da400663.diff

LOG: [AMDGPU] Introduce never uniform bit field in tablegen

IsNeverUniform can be set to 1 to mark instructions which are
inherently never-uniform/divergent. Enabling this bit in
Writelane instruction for now. To be extended to all required
instructions.

Reviewed By: arsenm, sameerds, #amdgpu

Differential Revision: https://reviews.llvm.org/D143154

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIDefines.h
    llvm/lib/Target/AMDGPU/SIInstrFormats.td
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir
    llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 142fb871d1c1a..432d522a37863 100644

--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -133,6 +133,9 @@ enum : uint64_t {
 
   // Whether tied sources will be read.
   TiedSourceNotRead = UINT64_C(1) << 60,
+
+  // Is never uniform.
+  IsNeverUniform = UINT64_C(1) << 61,
 };
 
 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index d86d4e6598036..f674777724ebb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -153,6 +153,9 @@ class InstSI <dag outs, dag ins, string asm = "",
   // This bit indicates that tied source will not be read.
   field bit TiedSourceNotRead = 0;
 
+  // This bit indicates that the instruction is never-uniform/divergent
+  field bit IsNeverUniform = 0;
+
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = SALU;
   let TSFlags{1} = VALU;
@@ -234,6 +237,8 @@ class InstSI <dag outs, dag ins, string asm = "",
 
   let TSFlags{60} = TiedSourceNotRead;
 
+  let TSFlags{61} = IsNeverUniform;
+  
   let SchedRW = [Write32Bit];
 
   let AsmVariantName = AMDGPUAsmVariants.Default;

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 55f0b65d5e1da..4955c794f53ba 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8414,7 +8414,14 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
 
 InstructionUniformity
 SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
+
+  if (isNeverUniform(MI))
+    return InstructionUniformity::NeverUniform;
+
   unsigned opcode = MI.getOpcode();
+  if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
+    return InstructionUniformity::AlwaysUniform;
+
   if (MI.isCopy()) {
     const MachineOperand &srcOp = MI.getOperand(1);
     if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
@@ -8456,12 +8463,6 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
     return InstructionUniformity::Default;
   }
 
-  if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
-    return InstructionUniformity::AlwaysUniform;
-
-  if (opcode == AMDGPU::V_WRITELANE_B32)
-    return InstructionUniformity::NeverUniform;
-
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo();
 

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 8de8d456be2f0..ce02b250084e6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -781,6 +781,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
   }
 
+  static bool isNeverUniform(const MachineInstr &MI){
+    return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
+  }
+
   static bool doesNotReadTiedSource(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
   }

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 10a7f0661f23b..d8d7682645032 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -764,11 +764,10 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag,
 let isConvergent = 1, Uses = []<Register> in {
 def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
   [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
-
-let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
 def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
   [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
-} // End $vdst = $vdst_in, DisableEncoding $vdst_in
+} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in
 } // End isConvergent = 1
 
 let isReMaterializable = 1 in {

diff  --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir
index 35a5a3f110e9c..9d15b8990bad3 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir
@@ -1,4 +1,5 @@
-# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
+# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
+
 # readlane, readfirstlane is always uniform
 
 ---

diff  --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir
index a125441613941..3adcc38ea43dd 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
+# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
 # loads from flat non uniform
 ---
 name:            flatloads