[llvm] 971fa4b - [AMDGPU] GFX11: remove ShaderType from ds_ordered_count offset field
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 23 06:35:05 PDT 2022
Author: Rodrigo Dominguez
Date: 2022-06-23T14:20:33+01:00
New Revision: 971fa4b1961bcf8013481d0fe1d4864d7adda0f8
URL: https://github.com/llvm/llvm-project/commit/971fa4b1961bcf8013481d0fe1d4864d7adda0f8
DIFF: https://github.com/llvm/llvm-project/commit/971fa4b1961bcf8013481d0fe1d4864d7adda0f8.diff
LOG: [AMDGPU] GFX11: remove ShaderType from ds_ordered_count offset field
In GFX11 ShaderType is determined by the hardware and should no longer
be written into bits[3:2] of the ds_ordered_count offset field.
Differential Revision: https://reviews.llvm.org/D128196
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 18932693c7f9..8fe735c1fcd1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1328,12 +1328,14 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(*MF);
unsigned Offset0 = OrderedCountIndex << 2;
- unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
- (Instruction << 4);
+ unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
if (STI.getGeneration() >= AMDGPUSubtarget::GFX10)
Offset1 |= (CountDw - 1) << 6;
+ if (STI.getGeneration() < AMDGPUSubtarget::GFX11)
+ Offset1 |= ShaderType << 2;
+
unsigned Offset = Offset0 | (Offset1 << 8);
Register M0Val = MI.getOperand(2).getReg();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8f17e3a83aca..887a086d421d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7113,12 +7113,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
unsigned ShaderType =
SIInstrInfo::getDSShaderTypeValue(DAG.getMachineFunction());
unsigned Offset0 = OrderedCountIndex << 2;
- unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
- (Instruction << 4);
+ unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
Offset1 |= (CountDw - 1) << 6;
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX11)
+ Offset1 |= ShaderType << 2;
+
unsigned Offset = Offset0 | (Offset1 << 8);
SDValue Ops[] = {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
index 55204c9e06fe..01837b33ac49 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
@@ -25,7 +25,7 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
; FUNC-LABEL: {{^}}ds_ordered_add_ps:
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN: s_mov_b32 m0, s0
-; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
%val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
@@ -36,7 +36,7 @@ define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
; FUNC-LABEL: {{^}}ds_ordered_add_vs:
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN: s_mov_b32 m0, s0
-; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
%val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
@@ -47,7 +47,7 @@ define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
; FUNC-LABEL: {{^}}ds_ordered_add_gs:
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN: s_mov_b32 m0, s0
-; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
%val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
More information about the llvm-commits
mailing list