[llvm] 02046ad - [AMDGPU] W/a for gfx940 byte0 fp8 conversion bug
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 11 02:29:35 PDT 2023
Author: Stanislav Mekhanoshin
Date: 2023-08-11T02:21:21-07:00
New Revision: 02046ad944a51408e6b8db530cb92a3fdc2eb990
URL: https://github.com/llvm/llvm-project/commit/02046ad944a51408e6b8db530cb92a3fdc2eb990
DIFF: https://github.com/llvm/llvm-project/commit/02046ad944a51408e6b8db530cb92a3fdc2eb990.diff
LOG: [AMDGPU] W/a for gfx940 byte0 fp8 conversion bug
VOP1 form of these do not work.
Differential Revision: https://reviews.llvm.org/D157683
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/VOP1Instructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 63044b08f48300..facaf8fd7145b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1862,6 +1862,9 @@ def HasGDS : Predicate<"Subtarget->hasGDS()">;
def HasGWS : Predicate<"Subtarget->hasGWS()">;
+def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
+def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index e1a4a1e76f84ae..27438e9a9fe358 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1172,6 +1172,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// \returns true if the target supports the pre-NGG legacy geometry path.
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
+ // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
+ bool hasCvtFP8VOP1Bug() const { return true; }
+
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1a8efc6e3df200..6275daee144263 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -584,18 +584,28 @@ let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
}
class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
- VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
+ VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
(f32 (node i32:$src, index)),
- !if (index,
- (inst_sdwa 0, $src, 0, 0, index),
- (inst_e32 $src))
+ (inst_sdwa 0, $src, 0, 0, index)
>;
-foreach Index = [0, 1, 2, 3] in {
- def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index,
- V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>;
- def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index,
- V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>;
+let OtherPredicates = [HasCvtFP8VOP1Bug] in {
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
+ (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
+ (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>;
+}
+
+let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
+ (V_CVT_F32_FP8_e32 $src)>;
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
+ (V_CVT_F32_BF8_e32 $src)>;
+}
+
+foreach Index = [1, 2, 3] in {
+ def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
+ def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index 6261a08bc61919..98b7e8cef0da85 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -10,7 +10,7 @@ declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte0:
-; GCN: v_cvt_f32_bf8_e32 v0, v0{{$}}
+; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0{{$}}
define float @test_cvt_f32_bf8_byte0(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
ret float %ret
@@ -38,7 +38,7 @@ define float @test_cvt_f32_bf8_byte3(i32 %a) {
}
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte0:
-; GCN: v_cvt_f32_fp8_e32 v0, v0{{$}}
+; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0{{$}}
define float @test_cvt_f32_fp8_byte0(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
ret float %ret
More information about the llvm-commits
mailing list