[llvm] ff85d61 - Update *_TMPRING_SIZE.WAVESIZE for GFX11
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 10 10:52:03 PDT 2022
Author: Jay Foad
Date: 2022-06-10T13:24:00-04:00
New Revision: ff85d61a6e1844067afc6f82962cc1c81cd30606
URL: https://github.com/llvm/llvm-project/commit/ff85d61a6e1844067afc6f82962cc1c81cd30606
DIFF: https://github.com/llvm/llvm-project/commit/ff85d61a6e1844067afc6f82962cc1c81cd30606.diff
LOG: Update *_TMPRING_SIZE.WAVESIZE for GFX11
The encoding of COMPUTE_TMPRING_SIZE.WAVESIZE and
SPI_TMPRING_SIZE.WAVESIZE has changed in GFX11: it is now in units
of 64 dwords instead of 256 dwords, and the field has been widened
from 13 bits to 15 bits.
Depends on D126989
Reviewed By: rampitec, arsenm, #amdgpu
Differential Revision: https://reviews.llvm.org/D127248
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIDefines.h
llvm/test/CodeGen/AMDGPU/mesa3d.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index c7a80ca41b992..1366aabe2c8c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -695,7 +695,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
const uint64_t MaxScratchPerWorkitem =
- GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
+ STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
ProgInfo.ScratchSize,
@@ -879,15 +879,14 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.LDSBlocks =
alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
- // Scratch is allocated in 256 dword blocks.
- unsigned ScratchAlignShift = 10;
+ // Scratch is allocated in 64-dword or 256-dword blocks.
+ unsigned ScratchAlignShift =
+ STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
// We need to program the hardware with the amount of scratch memory that
// is used by the entire wave. ProgInfo.ScratchSize is the amount of
// scratch memory used per thread.
- ProgInfo.ScratchBlocks =
- alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
- 1ULL << ScratchAlignShift) >>
- ScratchAlignShift;
+ ProgInfo.ScratchBlocks = divideCeil(
+ ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
@@ -946,6 +945,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
@@ -957,7 +957,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
- OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks));
+ OutStreamer->emitInt32(
+ STM.getGeneration() >= AMDGPUSubtarget::GFX11
+ ? S_00B860_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
+ : S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
// TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
// 0" comment but I don't see a corresponding field in the register spec.
@@ -966,8 +969,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);
- OutStreamer->emitIntValue(
- S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
+ OutStreamer->emitInt32(
+ STM.getGeneration() >= AMDGPUSubtarget::GFX11
+ ? S_0286E8_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
+ : S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
}
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 0f4242de80f03..a7102351ae197 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -201,9 +201,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
SIFrameLowering FrameLowering;
public:
- // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
- static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
-
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM);
~GCNSubtarget() override;
@@ -266,9 +263,19 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return (Generation)Gen;
}
+ unsigned getMaxWaveScratchSize() const {
+ // See COMPUTE_TMPRING_SIZE.WAVESIZE.
+ if (getGeneration() < GFX11) {
+ // 13-bit field in units of 256-dword.
+ return (256 * 4) * ((1 << 13) - 1);
+ }
+ // 15-bit field in units of 64-dword.
+ return (64 * 4) * ((1 << 15) - 1);
+ }
+
/// Return the number of high bits known to be zero for a frame index.
unsigned getKnownHighZeroBitsForFrameIndex() const {
- return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
+ return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
}
int getLDSBankCount() const {
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index e1a352ca37dec..349a3fa649cf1 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -1036,10 +1036,12 @@ enum Offset_COV5 : unsigned {
#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
-#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+#define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12)
+#define S_00B860_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12)
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
-#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+#define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12)
+#define S_0286E8_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12)
#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
#define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21)
diff --git a/llvm/test/CodeGen/AMDGPU/mesa3d.ll b/llvm/test/CodeGen/AMDGPU/mesa3d.ll
index 20d46e1d2a656..fc8ffb31cda69 100644
--- a/llvm/test/CodeGen/AMDGPU/mesa3d.ll
+++ b/llvm/test/CodeGen/AMDGPU/mesa3d.ll
@@ -1,10 +1,15 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
-; Check SPI_TMPRING_SIZE.WAVESIZE = 5
+; SPI_TMPRING_SIZE.WAVESIZE = 5
; GFX10: .long 165608
; GFX10-NEXT: .long 20480
+; SPI_TMPRING_SIZE.WAVESIZE = 17
+; GFX11: .long 165608
+; GFX11-NEXT: .long 69632
+
; GCN-LABEL: {{^}}scratch_ps:
; GCN: s_load_dwordx2 s[4:5], s[0:1], 0x0{{$}}
; GCN-DAG: s_mov_b32 s6, -1{{$}}
More information about the llvm-commits
mailing list