[llvm] AMDGPU: Fix incorrectly selecting fp8/bf8 conversion intrinsics (PR #107291)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 4 12:15:38 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Trying to codegen these on targets without the instructions should
fail to select. Not sure if all the predicates are correct. We had
a fake one disconnected to a feature which was always true.
Fixes: SWDEV-482274
---
Full diff: https://github.com/llvm/llvm-project/pull/107291.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+9-2)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+1)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+7-4)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f32.fp8.err.ll (+100)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5757ac0d4454d0..248e3ba3e32c56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -685,6 +685,13 @@ def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts",
"Has fp8 and bf8 conversion instructions"
>;
+def FeatureCvtFP8VOP1Bug : SubtargetFeature<"cvt-fp8-vop1-bug",
+ "HasCvtFP8Vop1Bug",
+ "true",
+ "FP8/BF8 VOP1 form of conversion to F32 is unreliable",
+ [FeatureFP8ConversionInsts]
+>;
+
def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"HasPkFmacF16Inst",
"true",
@@ -1444,7 +1451,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeaturePackedFP32Ops,
FeatureMAIInsts,
FeatureFP8Insts,
- FeatureFP8ConversionInsts,
+ FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug,
FeaturePkFmacF16Inst,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
@@ -1657,7 +1664,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeatureExtendedImageInsts,
- FeatureFP8ConversionInsts,
+ FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureSALUFloatInsts,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 7b74eab96c5677..8a909ad7be26f6 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -158,6 +158,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasFP8ConversionInsts = false;
+ bool HasCvtFP8Vop1Bug = false;
bool HasPkFmacF16Inst = false;
bool HasAtomicFMinFMaxF32GlobalInsts = false;
bool HasAtomicFMinFMaxF64GlobalInsts = false;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 03e4cb9fcf49b7..bc86dc7b94dec7 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -599,8 +599,8 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
(inst_sdwa 0, $src, 0, 0, index)
>;
-let SubtargetPredicate = isGFX9Only in {
-let OtherPredicates = [HasCvtFP8VOP1Bug] in {
+let SubtargetPredicate = HasFP8ConversionInsts in {
+let OtherPredicates = [HasCvtFP8VOP1Bug, HasSDWA] in {
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
@@ -614,11 +614,14 @@ let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
(V_CVT_F32_BF8_e32 $src)>;
}
+let OtherPredicates = [HasSDWA] in {
foreach Index = [1, 2, 3] in {
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}
-} // End SubtargetPredicate = isGFX9Only
+} // End OtherPredicates = [HasSDWA]
+
+} // End SubtargetPredicate = HasFP8ConversionInsts
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
@@ -628,7 +631,7 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
(inst_e32 $src))
>;
-let SubtargetPredicate = isGFX9Only in {
+let SubtargetPredicate = HasFP8ConversionInsts, OtherPredicates = [HasSDWA] in {
foreach Index = [0, -1] in {
def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f32.fp8.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f32.fp8.err.ll
new file mode 100644
index 00000000000000..29812993d541e2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f32.fp8.err.ll
@@ -0,0 +1,100 @@
+; RUN: split-file %s %t
+
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s
+
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s
+
+
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s
+
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s
+
+
+
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s
+
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s
+
+
+;--- fp8-byte0-err.ll
+; ERR-FP8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8
+; ERR-FP8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0
+
+define float @test_cvt_f32_fp8_byte0(i32 %a) {
+ %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
+ ret float %ret
+}
+
+;--- fp8-byte1-err.ll
+; ERR-FP8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8
+; ERR-FP8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1
+define float @test_cvt_f32_fp8_byte1(i32 %a) {
+ %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
+ ret float %ret
+}
+
+;--- bf8-byte0-err.ll
+; ERR-BF8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8
+; ERR-BF8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0
+define float @test_cvt_f32_bf8_byte0(i32 %a) {
+ %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
+ ret float %ret
+}
+
+;--- bf8-byte1-err.ll
+; ERR-BF8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8
+; ERR-BF8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1
+define float @test_cvt_f32_bf8_byte1(i32 %a) {
+ %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
+ ret float %ret
+}
+
+;--- pk-fp8-word0-err.ll
+; ERR-PK-FP8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8
+; ERR-PK-FP8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0
+define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
+ %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
+ ret <2 x float> %ret
+}
+
+;--- pk-fp8-word1-err.ll
+; ERR-PK-FP8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8
+; ERR-PK-FP8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1
+define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
+ %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
+ ret <2 x float> %ret
+}
+
+;--- pk-bf8-word0-err.ll
+; ERR-PK-BF8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8
+; ERR-PK-BF8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0
+define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
+ %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
+ ret <2 x float> %ret
+}
+
+;--- pk-bf8-word1-err.ll
+; ERR-PK-BF8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8
+; ERR-PK-BF8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1
+define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
+ %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
+ ret <2 x float> %ret
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/107291
More information about the llvm-commits
mailing list