[llvm] r369816 - [AMDGPU] w/a for gfx908 mfma SrcC literal HW bug
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 15:09:58 PDT 2019
Author: rampitec
Date: Fri Aug 23 15:09:58 2019
New Revision: 369816
URL: http://llvm.org/viewvc/llvm-project?rev=369816&view=rev
Log:
[AMDGPU] w/a for gfx908 mfma SrcC literal HW bug
gfx908 ignores an mfma if SrcC is a literal.
Differential Revision: https://reviews.llvm.org/D66670
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=369816&r1=369815&r2=369816&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Fri Aug 23 15:09:58 2019
@@ -154,6 +154,12 @@ def FeatureLdsMisalignedBug : SubtargetF
"Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
>;
+def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug",
+ "HasMFMAInlineLiteralBug",
+ "true",
+ "MFMA cannot use inline literal as SrcC"
+>;
+
def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
"HasVcmpxPermlaneHazard",
"true",
@@ -811,6 +817,7 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeaturePkFmacF16Inst,
FeatureAtomicFaddInsts,
FeatureSRAMECC,
+ FeatureMFMAInlineLiteralBug,
FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_9 : FeatureSet<
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=369816&r1=369815&r2=369816&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Fri Aug 23 15:09:58 2019
@@ -262,6 +262,7 @@ GCNSubtarget::GCNSubtarget(const Triple
AddNoCarryInsts(false),
HasUnpackedD16VMem(false),
LDSMisalignedBug(false),
+ HasMFMAInlineLiteralBug(false),
ScalarizeGlobal(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=369816&r1=369815&r2=369816&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Aug 23 15:09:58 2019
@@ -368,6 +368,7 @@ protected:
bool CaymanISA;
bool CFALUBug;
bool LDSMisalignedBug;
+ bool HasMFMAInlineLiteralBug;
bool HasVertexCache;
short TexVTXClauseSize;
bool ScalarizeGlobal;
@@ -987,6 +988,10 @@ public:
return SGPRInitBug;
}
+ bool hasMFMAInlineLiteralBug() const {
+ return HasMFMAInlineLiteralBug;
+ }
+
bool has12DWordStoreHazard() const {
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=369816&r1=369815&r2=369816&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Fri Aug 23 15:09:58 2019
@@ -435,7 +435,8 @@ static bool tryToFoldACImm(const SIInstr
OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST)
return false;
- if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy)) {
+ if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
+ TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
return true;
}
@@ -481,6 +482,9 @@ static bool tryToFoldACImm(const SIInstr
return false; // Can only fold splat constants
}
+ if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
+ return false;
+
FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
return true;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=369816&r1=369815&r2=369816&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Fri Aug 23 15:09:58 2019
@@ -61,6 +61,7 @@ static cl::opt<bool> EnableSpillSGPRToVG
SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
AMDGPURegisterInfo(),
+ ST(ST),
SGPRPressureSets(getNumRegPressureSets()),
VGPRPressureSets(getNumRegPressureSets()),
AGPRPressureSets(getNumRegPressureSets()),
@@ -1582,6 +1583,15 @@ const TargetRegisterClass *SIRegisterInf
}
}
+bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
+ if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST)
+ return !ST.hasMFMAInlineLiteralBug();
+
+ return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
+ OpType <= AMDGPU::OPERAND_SRC_LAST;
+}
+
bool SIRegisterInfo::shouldRewriteCopySrc(
const TargetRegisterClass *DefRC,
unsigned DefSubReg,
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=369816&r1=369815&r2=369816&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Fri Aug 23 15:09:58 2019
@@ -27,6 +27,7 @@ class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPURegisterInfo {
private:
+ const GCNSubtarget &ST;
unsigned SGPRSetID;
unsigned VGPRSetID;
unsigned AGPRSetID;
@@ -193,10 +194,7 @@ public:
/// \returns True if operands defined with this operand type can accept
/// an inline constant. i.e. An integer value in the range (-16, 64) or
/// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
- bool opCanUseInlineConstant(unsigned OpType) const {
- return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
- OpType <= AMDGPU::OPERAND_SRC_LAST;
- }
+ bool opCanUseInlineConstant(unsigned OpType) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC,
Modified: llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll?rev=369816&r1=369815&r2=369816&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll Fri Aug 23 15:09:58 2019
@@ -3,7 +3,7 @@
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
; GCN-LABEL: {{^}}test_32_agprs:
-; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}}, 0
+; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}},
; GCN-NOT: v28
; GCN: NumVgprs: 32
; GCN: VGPRBlocks: 7
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll?rev=369816&r1=369815&r2=369816&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll Fri Aug 23 15:09:58 2019
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,NOLIT-SRCC %s
+; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,LIT-SRCC %s
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
@@ -993,7 +994,12 @@ bb:
; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_imm_splat:
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
-; GCN: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0
+; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
+; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
+; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
+; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
+; NOLIT-SRCC: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}]
+; LIT-SRCC: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0
; GCN: v_accvgpr_read_b32
; GCN: v_accvgpr_read_b32
; GCN: v_accvgpr_read_b32
@@ -1009,7 +1015,9 @@ bb:
; GCN-LABEL: {{^}}test_mfma_f32_16x16x1f32_imm_splat:
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
-; GCN: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0
+; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
+; NOLIT-SRCC: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}]
+; LIT-SRCC: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0
; GCN-DAG: v_accvgpr_read_b32
; GCN-DAG: v_accvgpr_read_b32
; GCN-DAG: v_accvgpr_read_b32
@@ -1040,7 +1048,9 @@ bb:
; GCN-LABEL: {{^}}test_mfma_f32_32x32x8f16_imm_splat:
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 0x40004000
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 0x3c003c00
-; GCN: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], 1.0
+; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
+; NOLIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9:]+}}]
+; LIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], 1.0
; GCN-DAG: v_accvgpr_read_b32
; GCN-DAG: v_accvgpr_read_b32
; GCN-DAG: v_accvgpr_read_b32
@@ -1071,7 +1081,9 @@ bb:
; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_imm_splat:
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
-; GCN: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 0
+; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0
+; NOLIT-SRCC: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}]
+; LIT-SRCC: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 0
; GCN-DAG: v_accvgpr_read_b32
; GCN-DAG: v_accvgpr_read_b32
; GCN-DAG: v_accvgpr_read_b32
More information about the llvm-commits
mailing list