[llvm] 6881a82 - [AMDGPU] Fix scheduling of exp pos4

Thu Nov 12 11:57:31 PST 2020

Author: Jay Foad
Date: 2020-11-12T19:57:14Z
New Revision: 6881a82e8c31e420b7ec163928c8737812c8bd3b

URL: https://github.com/llvm/llvm-project/commit/6881a82e8c31e420b7ec163928c8737812c8bd3b
DIFF: https://github.com/llvm/llvm-project/commit/6881a82e8c31e420b7ec163928c8737812c8bd3b.diff

LOG: [AMDGPU] Fix scheduling of exp pos4

Also fix a similar issue in SIInsertWaitcnts, but I don't think that fix
has any effect in practice.

Differential Revision: https://reviews.llvm.org/D91290

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
    llvm/lib/Target/AMDGPU/SIDefines.h
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
index 685308b5d128..6494f1daae46 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
@@ -33,7 +33,7 @@ static bool isExport(const SUnit &SU) {
 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
   const MachineInstr *MI = SU->getInstr();
   int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
-  return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS3;
+  return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
 }
 
 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,

diff  --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index b5087d32964d..0abd96dc4607 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -698,8 +698,9 @@ enum Target {
   ET_NULL = 9,
   ET_POS0 = 12,
   ET_POS3 = 15,
-  ET_POS4 = 16, // GFX10+
-  ET_PRIM = 20, // GFX10+
+  ET_POS4 = 16,          // GFX10+
+  ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget
+  ET_PRIM = 20,          // GFX10+
   ET_PARAM0 = 32,
   ET_PARAM31 = 63,
 };

diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 5b4ca5bda247..6f1dfdac2c6f 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1327,7 +1327,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
     int Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
     if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
       ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
-    else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS3)
+    else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST)
       ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
     else
       ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index 9a62ca5db089..ede466976a3a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s
 
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
@@ -546,8 +546,8 @@ end:
 ; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
 ; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
 ; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
-; GCN-DAG: v_add_f32_e32 [[Z0:v[0-9]+]]
-; GCN-DAG: v_sub_f32_e32 [[Z1:v[0-9]+]]
+; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
+; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
 ; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
 ; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
 define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
@@ -570,6 +570,18 @@ define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
   ret void
 }
 
+; GCN-LABEL: {{^}}test_export_pos4_before_param:
+; GFX10: exp pos4
+; GFX10-NOT: s_waitcnt
+; GFX10: exp param0
+define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 {
+  %z0 = fadd float %x, %y
+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
+  %z1 = fsub float %y, %x
+  call void @llvm.amdgcn.exp.f32(i32 16, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
+  ret void
+}
+
 ; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
 ; GCN: exp pos0
 ; GCN: exp pos1