[PATCH] D128196: [AMDGPU] GFX11: remove ShaderType from ds_ordered_count offset field

Thu Jun 23 06:35:14 PDT 2022

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG971fa4b1961b: [AMDGPU] GFX11: remove ShaderType from ds_ordered_count offset field (authored by rdomingu, committed by foad).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128196/new/

https://reviews.llvm.org/D128196

Files:
  llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
@@ -25,7 +25,7 @@
 ; FUNC-LABEL: {{^}}ds_ordered_add_ps:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, s0
-; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
 define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
   %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
@@ -36,7 +36,7 @@
 ; FUNC-LABEL: {{^}}ds_ordered_add_vs:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, s0
-; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
 define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
   %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
@@ -47,7 +47,7 @@
 ; FUNC-LABEL: {{^}}ds_ordered_add_gs:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, s0
-; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
 define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
   %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7113,12 +7113,14 @@
     unsigned ShaderType =
         SIInstrInfo::getDSShaderTypeValue(DAG.getMachineFunction());
     unsigned Offset0 = OrderedCountIndex << 2;
-    unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
-                       (Instruction << 4);
+    unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
 
     if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
       Offset1 |= (CountDw - 1) << 6;
 
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX11)
+      Offset1 |= ShaderType << 2;
+
     unsigned Offset = Offset0 | (Offset1 << 8);
 
     SDValue Ops[] = {
Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1328,12 +1328,14 @@
   unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(*MF);
 
   unsigned Offset0 = OrderedCountIndex << 2;
-  unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
-                     (Instruction << 4);
+  unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
 
   if (STI.getGeneration() >= AMDGPUSubtarget::GFX10)
     Offset1 |= (CountDw - 1) << 6;
 
+  if (STI.getGeneration() < AMDGPUSubtarget::GFX11)
+    Offset1 |= ShaderType << 2;
+
   unsigned Offset = Offset0 | (Offset1 << 8);
 
   Register M0Val = MI.getOperand(2).getReg();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D128196.439377.patch
Type: text/x-patch
Size: 3510 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220623/8a1ec020/attachment.bin>