[llvm] [AMDGPU] Don't form sext/abs/neg fp8 cvt (PR #83843)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 5 23:24:31 PST 2024


https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/83843

>From 596979a7909bb57e9949a47e21ba8ce11e4dc295 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 4 Mar 2024 14:38:07 +0100
Subject: [PATCH 1/3] [AMDGPU] Don't form sext/abs/neg fp8 cvt on gfx940

Verifier should have also caught this, I will add that in a separate patch.

Fixes SWDEV-447468
---
 llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp     |  9 ++
 .../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll     | 96 +++++++++++++++++++
 2 files changed, 105 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index afc380b4203457..1fadd8ce45b1f5 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -338,6 +338,15 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
 }
 
 bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::V_CVT_F32_FP8_sdwa:
+  case AMDGPU::V_CVT_F32_BF8_sdwa:
+  case AMDGPU::V_CVT_PK_F32_FP8_sdwa:
+  case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
+    // Does not support input modifiers: noabs, noneg, nosext.
+    return false;
+  }
+
   // Find operand in instruction that matches source operand and replace it with
   // target operand. Set corresponding src_sel
   bool IsPreserveSrc = false;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index fc4b663b85a61b..9b8fdf90170458 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -534,3 +534,99 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
   %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 3)
   ret i32 %ret
 }
+
+define float @test_sext_cvt_f32_fp8(i16 %a) {
+; GFX940-LABEL: test_sext_cvt_f32_fp8:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX940-NEXT:    v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: test_sext_cvt_f32_fp8:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %a.sext = sext i16 %a to i32
+  %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a.sext, i32 1)
+  ret float %ret
+}
+
+define float @test_sext_cvt_f32_bf8(i16 %a) {
+; GFX940-LABEL: test_sext_cvt_f32_bf8:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX940-NEXT:    v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: test_sext_cvt_f32_bf8:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %a.sext = sext i16 %a to i32
+  %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a.sext, i32 1)
+  ret float %ret
+}
+
+define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
+; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX940-NEXT:    v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %a.sext = sext i16 %a to i32
+  %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a.sext, i1 true)
+  ret <2 x float> %ret
+}
+
+define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
+; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX940-NEXT:    v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %a.sext = sext i16 %a to i32
+  %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a.sext, i1 false)
+  ret <2 x float> %ret
+}

>From 55b8a4d37161eea1b0db7847cf9fcbf4adca6fb9 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 4 Mar 2024 14:46:14 +0100
Subject: [PATCH 2/3] add verifier

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 31ced9d41e15e2..a5b6c582a78cae 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4707,6 +4707,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
       }
     }
 
+    if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
+        Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
+        Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
+        Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
+      const MachineOperand *Src0ModsMO =
+          getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
+      unsigned Mods = Src0ModsMO->getImm();
+      if (Mods & SISrcMods::ABS || Mods & SISrcMods::NEG ||
+          Mods & SISrcMods::SEXT) {
+        ErrInfo = "sext, abs and neg are not allowed on this instruction";
+        return false;
+      }
+    }
+
     uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
     if (isVOPC(BasicOpcode)) {
       if (!ST.hasSDWASdst() && DstIdx != -1) {

>From ea62559d2ea7cc8db28413ff2c15d73e7d4ad1e8 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 6 Mar 2024 08:24:20 +0100
Subject: [PATCH 3/3] add verifier test

---
 .../test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir

diff --git a/llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir b/llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir
new file mode 100644
index 00000000000000..2066637a34af0e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir
@@ -0,0 +1,38 @@
+# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx940 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code"  %s
+
+# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
+# CHECK: $vgpr0 = V_CVT_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
+# CHECK: $vgpr0 = V_CVT_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
+# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
+# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
+# CHECK: $vgpr0 = V_CVT_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
+# CHECK: $vgpr0 = V_CVT_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
+# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
+# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+
+---
+name:           test
+liveins:
+body:            |
+  bb.0:
+    liveins: $vgpr0, $vgpr0_vgpr1
+
+    ; sext/neg
+    $vgpr0 = V_CVT_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+    $vgpr0 = V_CVT_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+    $vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+    $vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+
+    ; abs
+    $vgpr0 = V_CVT_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+    $vgpr0 = V_CVT_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+    $vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+    $vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
+...



More information about the llvm-commits mailing list