[llvm] 793b4b2 - Revert "AMDGPU: Correct const_index_stride for wave 32 for PAL ABI"
David Stuttard via llvm-commits
llvm-commits at lists.llvm.org
Fri May 7 04:49:42 PDT 2021
Author: David Stuttard
Date: 2021-05-07T12:49:17+01:00
New Revision: 793b4b26039e461dc3142a3f667ba7c97b0ed920
URL: https://github.com/llvm/llvm-project/commit/793b4b26039e461dc3142a3f667ba7c97b0ed920
DIFF: https://github.com/llvm/llvm-project/commit/793b4b26039e461dc3142a3f667ba7c97b0ed920.diff
LOG: Revert "AMDGPU: Correct const_index_stride for wave 32 for PAL ABI"
This reverts commit 442de0c1adf36bfddb5fb66b442bba8999fa733b.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index acb25d0bb951..c8ac34dc1523 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -525,7 +525,6 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
// The pointer to the GIT is formed from the offset passed in and either
// the amdgpu-git-ptr-high function attribute or the top part of the PC
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
- Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
buildGitPtr(MBB, I, DL, TII, Rsrc01);
@@ -547,20 +546,6 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
.addImm(0) // cpol
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
.addMemOperand(MMO);
-
- // The driver will always set the SRD for wave 64 (bits 118:117 of
- // descriptor / bits 22:21 of third sub-reg will be 0b11)
- // If the shader is actually wave32 we have to modify the const_index_stride
- // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
- // reason the driver does this is that there can be cases where it presents
- // 2 shaders with
diff erent wave size (e.g. VsFs).
- // TODO: convert to using SCRATCH instructions or multiple SRD buffers
- if (ST.isWave32()) {
- const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
- BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
- .addImm(21)
- .addReg(Rsrc03, RegState::ImplicitDefine);
- }
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
assert(!ST.isAmdHsaOrMesa(Fn));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
index 4a0f31829ce0..d1b826b70145 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
@@ -3,8 +3,7 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
; Check that it doesn't crash
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX9 %s
target datalayout = "A5"
@@ -14,8 +13,8 @@ define amdgpu_cs void @test_simple_indirect_call() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_getpc_b64 s[36:37]
; GFX9-NEXT: s_mov_b32 s36, s0
-; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x10
; GFX9-NEXT: s_getpc_b64 s[4:5]
+; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x10
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_add_u32 s36, s36, s0
@@ -24,23 +23,6 @@ define amdgpu_cs void @test_simple_indirect_call() {
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
-;
-; GFX10-LABEL: test_simple_indirect_call:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_getpc_b64 s[36:37]
-; GFX10-NEXT: s_mov_b32 s36, s0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x10
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_bitset0_b32 s39, 21
-; GFX10-NEXT: s_mov_b32 s32, 0
-; GFX10-NEXT: s_add_u32 s36, s36, s0
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: s_endpgm
-
%pc = call i64 @llvm.amdgcn.s.getpc()
%fun = inttoptr i64 %pc to void()*
More information about the llvm-commits
mailing list