[PATCH] D157683: [AMDGPU] W/a for gfx940 byte0 fp8 conversion bug

Fri Aug 11 02:29:39 PDT 2023

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG02046ad944a5: [AMDGPU] W/a for gfx940 byte0 fp8 conversion bug (authored by rampitec).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157683/new/

https://reviews.llvm.org/D157683

Files:
  llvm/lib/Target/AMDGPU/AMDGPU.td
  llvm/lib/Target/AMDGPU/GCNSubtarget.h
  llvm/lib/Target/AMDGPU/VOP1Instructions.td
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -10,7 +10,7 @@
 declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
 
 ; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte0:
-; GCN: v_cvt_f32_bf8_e32 v0, v0{{$}}
+; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0{{$}}
 define float @test_cvt_f32_bf8_byte0(i32 %a) {
   %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
   ret float %ret
@@ -38,7 +38,7 @@
 }
 
 ; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte0:
-; GCN: v_cvt_f32_fp8_e32 v0, v0{{$}}
+; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0{{$}}
 define float @test_cvt_f32_fp8_byte0(i32 %a) {
   %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
   ret float %ret
Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -584,18 +584,28 @@
 }
 
 class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
-    VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
+    VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
     (f32 (node i32:$src, index)),
-    !if (index,
-         (inst_sdwa 0, $src, 0, 0, index),
-         (inst_e32 $src))
+    (inst_sdwa 0, $src, 0, 0, index)
 >;
 
-foreach Index = [0, 1, 2, 3] in {
-  def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index,
-                       V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>;
-  def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index,
-                       V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>;
+let OtherPredicates = [HasCvtFP8VOP1Bug] in {
+  def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
+               (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
+  def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
+               (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>;
+}
+
+let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
+  def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
+               (V_CVT_F32_FP8_e32 $src)>;
+  def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
+               (V_CVT_F32_BF8_e32 $src)>;
+}
+
+foreach Index = [1, 2, 3] in {
+  def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
+  def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
 }
 
 class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h
===================================================================
--- llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1172,6 +1172,9 @@
   // \returns true if the target supports the pre-NGG legacy geometry path.
   bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
 
+  // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
+  bool hasCvtFP8VOP1Bug() const { return true; }
+
   /// \returns SGPR allocation granularity supported by the subtarget.
   unsigned getSGPRAllocGranule() const {
     return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
Index: llvm/lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPU.td
+++ llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1862,6 +1862,9 @@
 
 def HasGWS : Predicate<"Subtarget->hasGWS()">;
 
+def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
+def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
+
 // Include AMDGPU TD files
 include "SISchedule.td"
 include "GCNProcessors.td"


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D157683.549307.patch
Type: text/x-patch
Size: 3651 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230811/cd4e2050/attachment.bin>