[PATCH] D157683: [AMDGPU] W/a for gfx940 byte0 fp8 conversion bug
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 11 02:18:27 PDT 2023
rampitec updated this revision to Diff 549301.
rampitec marked 2 inline comments as done.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D157683/new/
https://reviews.llvm.org/D157683
Files:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/VOP1Instructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -10,7 +10,7 @@
declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte0:
-; GCN: v_cvt_f32_bf8_e32 v0, v0{{$}}
+; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0{{$}}
define float @test_cvt_f32_bf8_byte0(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
ret float %ret
@@ -38,7 +38,7 @@
}
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte0:
-; GCN: v_cvt_f32_fp8_e32 v0, v0{{$}}
+; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0{{$}}
define float @test_cvt_f32_fp8_byte0(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
ret float %ret
Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -584,18 +584,28 @@
}
class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
- VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
+ VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
(f32 (node i32:$src, index)),
- !if (index,
- (inst_sdwa 0, $src, 0, 0, index),
- (inst_e32 $src))
+ (inst_sdwa 0, $src, 0, 0, index)
>;
-foreach Index = [0, 1, 2, 3] in {
- def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index,
- V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>;
- def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index,
- V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>;
+let OtherPredicates = [HasCvtFP8VOP1Bug] in {
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
+ (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
+ (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>;
+}
+
+let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
+ (V_CVT_F32_FP8_e32 $src)>;
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
+ (V_CVT_F32_BF8_e32 $src)>;
+}
+
+foreach Index = [1, 2, 3] in {
+ def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
+ def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h
===================================================================
--- llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1172,6 +1172,9 @@
// \returns true if the target supports the pre-NGG legacy geometry path.
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
+ // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
+ bool hasCvtFP8VOP1Bug() const { return true; }
+
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
Index: llvm/lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPU.td
+++ llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1862,6 +1862,9 @@
def HasGWS : Predicate<"Subtarget->hasGWS()">;
+def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
+def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D157683.549301.patch
Type: text/x-patch
Size: 3651 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230811/c6647465/attachment.bin>
More information about the llvm-commits
mailing list