[llvm] [AMDGPU] Handle hazard in v_scalef32_sr_fp4_* conversions (PR #118589)

Pravin Jagtap via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 6 20:35:40 PST 2024


https://github.com/pravinjagtap updated https://github.com/llvm/llvm-project/pull/118589

>From 085035ee9d31f938e343e1f469e06bc492b8f5f9 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Wed, 4 Dec 2024 10:10:46 +0530
Subject: [PATCH 1/3] [AMDGPU] Handle hazard in v_scalef32_sr_fp4_* conversions
 of gfx950.

Presently, compiler selectivelly adds nop when opsel != 0 i.e.
only when partially writing to high bytes. Experiments in
SWDEV-499733 and SWDEV-501347 suggest that we need nop for
above cases irrespctive of opsel values.

Note: We might need to add few others into the same table.
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 38 +++++++++++--------
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         | 10 +++++
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 12 ++++++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  3 ++
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |  4 +-
 llvm/lib/Target/AMDGPU/VOPInstructions.td     |  1 +
 llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir   | 15 +++++---
 7 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index ecf03b14143ee3..08b75112f99834 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -916,21 +916,29 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
   if (SIInstrInfo::isSDWA(MI)) {
     // Type 1: SDWA with dst_sel != DWORD
     if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
-      if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
-        return nullptr;
-  } else {
-    // Type 2 && Type 3: (VOP3 which write the hi bits) || (FP8DstSelInst
-    // with op_sel[3:2] != 0)
-    if (!AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel) ||
-        !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
-              SISrcMods::DST_OP_SEL ||
-          (AMDGPU::isFP8DstSelInst(Opcode) &&
-           (TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
-            SISrcMods::OP_SEL_0))))
-      return nullptr;
-  }
-
-  return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+      if (DstSel->getImm() != AMDGPU::SDWA::DWORD)
+        return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+  }
+
+  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel)) {
+    // Type 2: VOP3 which write the hi bits
+    if (TII->getNamedImmOperand(MI, AMDGPU::OpName::src0_modifiers) &
+        SISrcMods::DST_OP_SEL)
+      return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+    // Type 3: FP8DstSelInst with op_sel[3:2] != 0)
+    if (AMDGPU::isFP8DstSelInst(Opcode) &&
+        (TII->getNamedImmOperand(MI, AMDGPU::OpName::src2_modifiers) &
+         SISrcMods::OP_SEL_0))
+      return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+  }
+
+  // Special case: nop is required for all the opsel values for fp4 sr variant
+  // cvt scale instructions
+  if (AMDGPU::isFP4DstSelInst(Opcode))
+    return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+  return nullptr;
 }
 
 /// Checks whether the provided \p MI "consumes" the operand with a Dest sel
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 7bc6db4cec1065..1742dcf112bf8a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2567,6 +2567,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field bit IsFP8SrcByteSel = 0;
   field bit IsFP8DstByteSel = 0;
   field bit HasFP8DstByteSel = 0;
+  field bit HasFP4DstByteSel = 0;
   field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel);
 
   field bit HasDst = !ne(DstVT.Value, untyped.Value);
@@ -3258,6 +3259,15 @@ def FP8DstByteSelTable : GenericTable {
   let PrimaryKeyName = "getFP8DstByteSelHelper";
 }
 
+def FP4DstByteSelTable : GenericTable {
+  let FilterClass = "VOP3_Pseudo";
+  let CppTypeName = "FP4DstByteSelInfo";
+  let Fields = ["Opcode", "HasFP4DstByteSel"];
+
+  let PrimaryKey = ["Opcode"];
+  let PrimaryKeyName = "getFP4DstByteSelHelper";
+}
+
 def VOPDComponentTable : GenericTable {
   let FilterClass = "VOPD_Component";
   let CppTypeName = "VOPDComponentInfo";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5a0e812748fbb7..bbb329746fc5ed 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -380,6 +380,8 @@ struct VOPTrue16Info {
 
 #define GET_FP8DstByteSelTable_DECL
 #define GET_FP8DstByteSelTable_IMPL
+#define GET_FP4DstByteSelTable_DECL
+#define GET_FP4DstByteSelTable_IMPL
 
 struct DPMACCInstructionInfo {
   uint16_t Opcode;
@@ -391,6 +393,11 @@ struct FP8DstByteSelInfo {
   bool HasFP8DstByteSel;
 };
 
+struct FP4DstByteSelInfo {
+  uint16_t Opcode;
+  bool HasFP4DstByteSel;
+};
+
 #define GET_FP8DstByteSelTable_DECL
 #define GET_FP8DstByteSelTable_IMPL
 #define GET_MTBUFInfoTable_DECL
@@ -662,6 +669,11 @@ bool isFP8DstSelInst(unsigned Opc) {
   return Info ? Info->HasFP8DstByteSel : false;
 }
 
+bool isFP4DstSelInst(unsigned Opc) {
+  const FP4DstByteSelInfo *Info = getFP4DstByteSelHelper(Opc);
+  return Info ? Info->HasFP4DstByteSel : false;
+}
+
 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
   return Info ? Info->Opcode3Addr : ~0u;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index ea497d7b239d7e..7648f52fa4a7a5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -887,6 +887,9 @@ bool isTrue16Inst(unsigned Opc);
 LLVM_READONLY
 bool isFP8DstSelInst(unsigned Opc);
 
+LLVM_READONLY
+bool isFP4DstSelInst(unsigned Opc);
+
 LLVM_READONLY
 bool isInvalidSingleUseConsumerInst(unsigned Opc);
 
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 47b60bb0fdab30..7566cca4a295c6 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1014,7 +1014,7 @@ class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
   let HasExtVOP3DPP = 0;
   let HasOpSel = 1;
   let HasOMod = 0;
-  let HasFP8DstByteSel = 1;
+  let HasFP4DstByteSel = 1;
 }
 
 def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
@@ -1026,7 +1026,7 @@ def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32
   let HasExtVOP3DPP = 0;
   let HasOpSel = 1;
   let HasOMod = 0;
-  let HasFP8DstByteSel = 1;
+  let HasFP4DstByteSel = 1;
 }
 
 class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 0e19696a32f86f..c38ec3ba897270 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -110,6 +110,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
   let IsSWMMAC = P.IsSWMMAC;
 
   bit HasFP8DstByteSel = P.HasFP8DstByteSel;
+  bit HasFP4DstByteSel = P.HasFP4DstByteSel;
 
   let AsmOperands = !if(!and(!not(P.IsTrue16), isVop3OpSel),
                         P.AsmVOP3OpSel,
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
index 1bbad901d16b2c..49576433ab54da 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
@@ -642,17 +642,18 @@ body:             |
 ...
 
 ---
-name:            test_scalef32_sr_pk_fp4_bf16_neg_opsel0_hazard
+name:            test_scalef32_sr_pk_fp4_bf16_opsel0_hazard
 body:             |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-    ; GCN-LABEL: name: test_scalef32_sr_pk_fp4_bf16_neg_opsel0_hazard
+    ; GCN-LABEL: name: test_scalef32_sr_pk_fp4_bf16_opsel0_hazard
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: S_WAITCNT 0
     ; GCN-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
     ; GCN-NEXT: S_WAITCNT 3952
     ; GCN-NEXT: early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_BF16_e64 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, killed $vgpr0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr1, $vgpr1, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
     S_WAITCNT 0
@@ -731,17 +732,18 @@ body:             |
 ...
 
 ---
-name:            test_scalef32_sr_pk_fp4_f32_neg_opsel0_hazard
+name:            test_scalef32_sr_pk_fp4_f32_opsel0_hazard
 body:             |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-    ; GCN-LABEL: name: test_scalef32_sr_pk_fp4_f32_neg_opsel0_hazard
+    ; GCN-LABEL: name: test_scalef32_sr_pk_fp4_f32_opsel0_hazard
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: S_WAITCNT 0
     ; GCN-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
     ; GCN-NEXT: S_WAITCNT 3952
     ; GCN-NEXT: early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_F32_e64 0, killed $vgpr2_vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, killed $vgpr0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr1, $vgpr1, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
     S_WAITCNT 0
@@ -1119,17 +1121,18 @@ body:             |
 ...
 
 ---
-name:            test_cvt_scale_cvt_scalef32_sr_pk_fp4_f16_neg_opsel0_hazard
+name:            test_cvt_scale_cvt_scalef32_sr_pk_fp4_f16_opsel0_hazard
 body:             |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-    ; GCN-LABEL: name: test_cvt_scale_cvt_scalef32_sr_pk_fp4_f16_neg_opsel0_hazard
+    ; GCN-LABEL: name: test_cvt_scale_cvt_scalef32_sr_pk_fp4_f16_opsel0_hazard
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: S_WAITCNT 0
     ; GCN-NEXT: renamable $vgpr2 = V_CVT_SCALEF32_PK_FP4_F16_e64 8, $vgpr0, 0, $vgpr1, 4, killed $vgpr2, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: early-clobber renamable $vgpr4 = V_CVT_SCALEF32_SR_PK_FP4_F16_e64 0, killed $vgpr0, 0, killed $vgpr3, 0, killed $vgpr1, killed $vgpr2, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
     S_WAITCNT 0

>From b34afa8c579952025f54309a39a08cf017b3538b Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Fri, 6 Dec 2024 11:07:27 +0530
Subject: [PATCH 2/3] Combined the fp4 and fp8 queries in getFPDstSelType.

---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp  |  5 +++--
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 16 +++++++++-------
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h   |  7 +++----
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 08b75112f99834..5207201e14c091 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -920,6 +920,7 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
         return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
   }
 
+  AMDGPU::FPType IsFP4OrFP8ConvOpc = AMDGPU::getFPDstSelType(Opcode);
   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel)) {
     // Type 2: VOP3 which write the hi bits
     if (TII->getNamedImmOperand(MI, AMDGPU::OpName::src0_modifiers) &
@@ -927,7 +928,7 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
       return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
 
     // Type 3: FP8DstSelInst with op_sel[3:2] != 0)
-    if (AMDGPU::isFP8DstSelInst(Opcode) &&
+    if (IsFP4OrFP8ConvOpc == AMDGPU::FPType::FP8 &&
         (TII->getNamedImmOperand(MI, AMDGPU::OpName::src2_modifiers) &
          SISrcMods::OP_SEL_0))
       return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
@@ -935,7 +936,7 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
 
   // Special case: nop is required for all the opsel values for fp4 sr variant
   // cvt scale instructions
-  if (AMDGPU::isFP4DstSelInst(Opcode))
+  if (IsFP4OrFP8ConvOpc == AMDGPU::FPType::FP4)
     return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
 
   return nullptr;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index bbb329746fc5ed..ae22e3d7e91572 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -664,14 +664,16 @@ bool isTrue16Inst(unsigned Opc) {
   return Info ? Info->IsTrue16 : false;
 }
 
-bool isFP8DstSelInst(unsigned Opc) {
-  const FP8DstByteSelInfo *Info = getFP8DstByteSelHelper(Opc);
-  return Info ? Info->HasFP8DstByteSel : false;
-}
+FPType getFPDstSelType(unsigned Opc) {
+  const FP8DstByteSelInfo *Info8 = getFP8DstByteSelHelper(Opc);
+  if (Info8 && Info8->HasFP8DstByteSel)
+    return FPType::FP8;
+
+  const FP4DstByteSelInfo *Info4 = getFP4DstByteSelHelper(Opc);
+  if (Info4 && Info4->HasFP4DstByteSel)
+    return FPType::FP4;
 
-bool isFP4DstSelInst(unsigned Opc) {
-  const FP4DstByteSelInfo *Info = getFP4DstByteSelHelper(Opc);
-  return Info ? Info->HasFP4DstByteSel : false;
+  return FPType::None;
 }
 
 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 7648f52fa4a7a5..29f64d0db8dd2e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -55,6 +55,8 @@ static constexpr unsigned GFX12 = 1;
 
 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
 
+enum class FPType { None, FP4, FP8 };
+
 /// \returns True if \p STI is AMDHSA.
 bool isHsaAbi(const MCSubtargetInfo &STI);
 
@@ -885,10 +887,7 @@ LLVM_READONLY
 bool isTrue16Inst(unsigned Opc);
 
 LLVM_READONLY
-bool isFP8DstSelInst(unsigned Opc);
-
-LLVM_READONLY
-bool isFP4DstSelInst(unsigned Opc);
+FPType getFPDstSelType(unsigned Opc);
 
 LLVM_READONLY
 bool isInvalidSingleUseConsumerInst(unsigned Opc);

>From 3cfa49c3616c7da5fb53148bbcc4d18bc10795f3 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Sat, 7 Dec 2024 09:53:26 +0530
Subject: [PATCH 3/3] Merged two search tables of fp4 and fp8

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         | 17 ++++---------
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 24 +++++++------------
 2 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 1742dcf112bf8a..bb78e77a9dc1a6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -3250,22 +3250,13 @@ def isMFMA_F8F6F4Table : GenericTable {
   let PrimaryKeyName = "isMFMA_F8F6F4" ;
 }
 
-def FP8DstByteSelTable : GenericTable {
+def FP4FP8DstByteSelTable : GenericTable {
   let FilterClass = "VOP3_Pseudo";
-  let CppTypeName = "FP8DstByteSelInfo";
-  let Fields = ["Opcode", "HasFP8DstByteSel"];
+  let CppTypeName = "FP4FP8DstByteSelInfo";
+  let Fields = ["Opcode", "HasFP8DstByteSel", "HasFP4DstByteSel"];
 
   let PrimaryKey = ["Opcode"];
-  let PrimaryKeyName = "getFP8DstByteSelHelper";
-}
-
-def FP4DstByteSelTable : GenericTable {
-  let FilterClass = "VOP3_Pseudo";
-  let CppTypeName = "FP4DstByteSelInfo";
-  let Fields = ["Opcode", "HasFP4DstByteSel"];
-
-  let PrimaryKey = ["Opcode"];
-  let PrimaryKeyName = "getFP4DstByteSelHelper";
+  let PrimaryKeyName = "getFP4FP8DstByteSelHelper";
 }
 
 def VOPDComponentTable : GenericTable {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index ae22e3d7e91572..7c7bafd93c89db 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -378,23 +378,17 @@ struct VOPTrue16Info {
   bool IsTrue16;
 };
 
-#define GET_FP8DstByteSelTable_DECL
-#define GET_FP8DstByteSelTable_IMPL
-#define GET_FP4DstByteSelTable_DECL
-#define GET_FP4DstByteSelTable_IMPL
+#define GET_FP4FP8DstByteSelTable_DECL
+#define GET_FP4FP8DstByteSelTable_IMPL
 
 struct DPMACCInstructionInfo {
   uint16_t Opcode;
   bool IsDPMACCInstruction;
 };
 
-struct FP8DstByteSelInfo {
+struct FP4FP8DstByteSelInfo {
   uint16_t Opcode;
   bool HasFP8DstByteSel;
-};
-
-struct FP4DstByteSelInfo {
-  uint16_t Opcode;
   bool HasFP4DstByteSel;
 };
 
@@ -665,15 +659,13 @@ bool isTrue16Inst(unsigned Opc) {
 }
 
 FPType getFPDstSelType(unsigned Opc) {
-  const FP8DstByteSelInfo *Info8 = getFP8DstByteSelHelper(Opc);
-  if (Info8 && Info8->HasFP8DstByteSel)
+  const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
+  if (Info && Info->HasFP8DstByteSel)
     return FPType::FP8;
-
-  const FP4DstByteSelInfo *Info4 = getFP4DstByteSelHelper(Opc);
-  if (Info4 && Info4->HasFP4DstByteSel)
+  else if (Info && Info->HasFP4DstByteSel)
     return FPType::FP4;
-
-  return FPType::None;
+  else
+    return FPType::None;
 }
 
 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {



More information about the llvm-commits mailing list