[llvm] [AMDGPU] Fix inline constant encoding for `v_pk_fmac_f16` (PR #176659)

Sun Jan 18 09:21:38 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

<details>
<summary>Changes</summary>

This PR handles`v_pk_fmac_f16` inline constant encoding/decoding differences between pre-GFX11 and GFX11+ hardware.

- Pre-GFX11: fp16 inline constants produce (f16, 0) - value in low 16 bits, zero in high.
- GFX11+: fp16 inline constants are duplicated to both halves (f16, f16).

---
Full diff: https://github.com/llvm/llvm-project/pull/176659.diff


12 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+12-2) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+8) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+14) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+9-1) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+56) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+10) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s (+4-1) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2.s (+4-1) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s (+4-1) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop2.s (+4-1) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt (+4-1) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt (+4-1) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index dd3120f05ce26..74c6ce04b9ad6 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -528,12 +528,22 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
         break;
       case AMDGPU::OPERAND_REG_IMM_FP16:
       case AMDGPU::OPERAND_REG_IMM_INT16:
-      case AMDGPU::OPERAND_REG_IMM_V2FP16:
       case AMDGPU::OPERAND_REG_INLINE_C_FP16:
       case AMDGPU::OPERAND_REG_INLINE_C_INT16:
-      case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
         Imm = getInlineImmValF16(Imm);
         break;
+      case AMDGPU::OPERAND_REG_IMM_V2FP16:
+      case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+        // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
+        // halves, so we need to produce the duplicated value for correct
+        // round-trip.
+        if (AMDGPU::isPKFMACF16(MI.getOpcode()) && isGFX11Plus()) {
+          int64_t F16Val = getInlineImmValF16(Imm);
+          Imm = (F16Val << 16) | (F16Val & 0xFFFF);
+        } else {
+          Imm = getInlineImmValF16(Imm);
+        }
+        break;
       case AMDGPU::OPERAND_REG_IMM_FP64:
       case AMDGPU::OPERAND_REG_IMM_INT64:
       case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 5777e77c25e55..37a8b5a354f07 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -350,6 +350,14 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding(
 
   case AMDGPU::OPERAND_REG_IMM_V2FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+    // V_PK_FMAC_F16 has different inline constant behavior on pre-GFX11 vs
+    // GFX11+: pre-GFX11 produces (f16, 0), GFX11+ duplicates f16 to both
+    // halves.
+    if (AMDGPU::isPKFMACF16(Desc.getOpcode())) {
+      return AMDGPU::getPKFMACF16InlineEncoding(static_cast<uint32_t>(Imm),
+                                                !AMDGPU::isGFX11Plus(STI))
+          .value_or(255);
+    }
     return AMDGPU::getInlineEncodingV2F16(static_cast<uint32_t>(Imm))
         .value_or(255);
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6392022368785..56f4a907d5236 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4763,6 +4763,20 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const {
   }
 }
 
+bool SIInstrInfo::isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
+                                   int64_t ImmVal, uint8_t OpType) const {
+  // V_PK_FMAC_F16 has different inline constant behavior:
+  // - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
+  // - GFX11+: fp16 inline constants are duplicated into both halves
+  if ((OpType == AMDGPU::OPERAND_REG_IMM_V2FP16 ||
+       OpType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) &&
+      (MI.getOpcode() == AMDGPU::V_PK_FMAC_F16_e32 ||
+       MI.getOpcode() == AMDGPU::V_PK_FMAC_F16_e64))
+    return AMDGPU::isPKFMACF16InlineConstant(ImmVal, !AMDGPU::isGFX11Plus(ST));
+
+  return isInlineConstant(ImmVal, OpType);
+}
+
 static bool compareMachineOp(const MachineOperand &Op0,
                              const MachineOperand &Op1) {
   if (Op0.getType() != Op1.getType())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index bf51b22274f3d..90e39322ad507 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1278,7 +1278,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
       return isInlineConstant(ImmVal, OpType);
     }
 
-    return isInlineConstant(ImmVal, MI.getDesc().operands()[OpIdx].OperandType);
+    return isInlineConstant(MI, OpIdx, ImmVal,
+                            MI.getDesc().operands()[OpIdx].OperandType);
   }
 
   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
@@ -1290,6 +1291,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return isInlineConstant(*MO.getParent(), MO.getOperandNo());
   }
 
+  /// Check if \p ImmVal can be used as an inline constant for operand \p OpIdx
+  /// in instruction \p MI with operand type \p OpType.
+  /// This handles V_PK_FMAC_F16 specially due to different inline constant
+  /// behavior on pre-GFX11 vs GFX11+.
+  bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, int64_t ImmVal,
+                        uint8_t OpType) const;
+
   bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
                          const MachineOperand &MO) const;
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 6e689028fbd83..d8f4004bb6a02 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3168,6 +3168,34 @@ std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
   return getInlineEncodingV216(true, Literal);
 }
 
+// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
+// or nullopt. This accounts for different inline constant behavior:
+// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
+// - GFX11+: fp16 inline constants are duplicated into both halves
+std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
+                                                   bool IsPreGFX11) {
+  // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
+  if (IsPreGFX11)
+    return getInlineEncodingV216(/*IsFloat=*/true, Literal);
+
+  // GFX11+ behavior: f16 duplicated in both halves
+  // First, check for sign-extended integer inline constants (-16 to 64)
+  // These work the same across all generations
+  int32_t Signed = static_cast<int32_t>(Literal);
+  if (Signed >= 0 && Signed <= 64)
+    return 128 + Signed;
+
+  if (Signed >= -16 && Signed <= -1)
+    return 192 + std::abs(Signed);
+
+  // For float inline constants on GFX11+, both halves must be equal
+  uint16_t Lo = static_cast<uint16_t>(Literal);
+  uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
+  if (Lo != Hi)
+    return std::nullopt;
+  return getInlineEncodingV216(/*IsFloat=*/true, Lo);
+}
+
 // Whether the given literal can be inlined for a V_PK_* instruction.
 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
   switch (OpType) {
@@ -3202,6 +3230,11 @@ bool isInlinableLiteralV2F16(uint32_t Literal) {
   return getInlineEncodingV2F16(Literal).has_value();
 }
 
+// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
+bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsPreGFX11) {
+  return getPKFMACF16InlineEncoding(Literal, IsPreGFX11).has_value();
+}
+
 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
   if (IsFP64)
     return !Lo_32(Val);
@@ -3662,6 +3695,29 @@ bool isPackedFP32Inst(unsigned Opc) {
   }
 }
 
+bool isPKFMACF16(unsigned Opc) {
+  switch (Opc) {
+  case AMDGPU::V_PK_FMAC_F16_dpp:
+  case AMDGPU::V_PK_FMAC_F16_e32:
+  case AMDGPU::V_PK_FMAC_F16_e64:
+  case AMDGPU::V_PK_FMAC_F16_e64_dpp:
+  case AMDGPU::V_PK_FMAC_F16_sdwa:
+  case AMDGPU::V_PK_FMAC_F16_dpp8_gfx10:
+  case AMDGPU::V_PK_FMAC_F16_dpp_gfx10:
+  case AMDGPU::V_PK_FMAC_F16_dpp_gfx9:
+  case AMDGPU::V_PK_FMAC_F16_e32_gfx10:
+  case AMDGPU::V_PK_FMAC_F16_e32_gfx11:
+  case AMDGPU::V_PK_FMAC_F16_e32_gfx12:
+  case AMDGPU::V_PK_FMAC_F16_e32_gfx9:
+  case AMDGPU::V_PK_FMAC_F16_e32_vi:
+  case AMDGPU::V_PK_FMAC_F16_e64_gfx9:
+  case AMDGPU::V_PK_FMAC_F16_sdwa_gfx9:
+    return true;
+  default:
+    return false;
+  }
+}
+
 const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
   assert(isFixedDims() && "expect kind to be FixedDims");
   return Dims;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 9d9a7e3d862de..d2183d9cc3665 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1746,6 +1746,10 @@ std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
 LLVM_READNONE
 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
 
+LLVM_READNONE
+std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
+                                                   bool IsPreGFX11);
+
 LLVM_READNONE
 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
 
@@ -1758,6 +1762,9 @@ bool isInlinableLiteralV2BF16(uint32_t Literal);
 LLVM_READNONE
 bool isInlinableLiteralV2F16(uint32_t Literal);
 
+LLVM_READNONE
+bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsPreGFX11);
+
 LLVM_READNONE
 bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
 
@@ -1770,6 +1777,9 @@ bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
 
 LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
 
+/// Returns true if \p Opc is a V_PK_FMAC_F16 instruction variant.
+LLVM_READONLY bool isPKFMACF16(unsigned Opc);
+
 LLVM_READONLY
 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
                                       int64_t EncodedOffset);
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
index f05178dae37c9..83eeadb449aa6 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
@@ -1916,7 +1916,10 @@ v_pk_fmac_f16 v5, -1, v2
 // GFX11: v_pk_fmac_f16 v5, -1, v2                ; encoding: [0xc1,0x04,0x0a,0x78]
 
 v_pk_fmac_f16 v5, 0.5, v2
-// GFX11: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2        ; encoding: [0xf0,0x04,0x0a,0x78]
 
 v_pk_fmac_f16 v5, exec_hi, v2
 // GFX11: v_pk_fmac_f16 v5, exec_hi, v2           ; encoding: [0x7f,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
index fbc6713245398..4beca887a3e55 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
@@ -2039,7 +2039,10 @@ v_pk_fmac_f16 v5, -1, v2
 // GFX11: v_pk_fmac_f16 v5, -1, v2                ; encoding: [0xc1,0x04,0x0a,0x78]
 
 v_pk_fmac_f16 v5, 0.5, v2
-// GFX11: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2        ; encoding: [0xf0,0x04,0x0a,0x78]
 
 v_pk_fmac_f16 v5, src_scc, v2
 // GFX11: v_pk_fmac_f16 v5, src_scc, v2           ; encoding: [0xfd,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
index 6c9c4c60e9817..c535adea8b821 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
@@ -1922,7 +1922,10 @@ v_pk_fmac_f16 v5, -1, v2
 // GFX12: v_pk_fmac_f16 v5, -1, v2                ; encoding: [0xc1,0x04,0x0a,0x78]
 
 v_pk_fmac_f16 v5, 0.5, v2
-// GFX12: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2        ; encoding: [0xf0,0x04,0x0a,0x78]
 
 v_pk_fmac_f16 v5, exec_hi, v2
 // GFX12: v_pk_fmac_f16 v5, exec_hi, v2           ; encoding: [0x7f,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
index e57d2c3e74d70..828430d2b2b95 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
@@ -2048,7 +2048,10 @@ v_pk_fmac_f16 v5, -1, v2
 // GFX12: v_pk_fmac_f16 v5, -1, v2                ; encoding: [0xc1,0x04,0x0a,0x78]
 
 v_pk_fmac_f16 v5, 0.5, v2
-// GFX12: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2        ; encoding: [0xf0,0x04,0x0a,0x78]
 
 v_pk_fmac_f16 v5, src_scc, v2
 // GFX12: v_pk_fmac_f16 v5, src_scc, v2           ; encoding: [0xfd,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
index 9fc3f619529a2..84f2f895e0c80 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
@@ -1843,8 +1843,11 @@
 0xc1,0x04,0x0a,0x78
 # GFX11: v_pk_fmac_f16 v5, -1, v2                ; encoding: [0xc1,0x04,0x0a,0x78]
 
+0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00
+# GFX11: v_pk_fmac_f16 v5, lit(0x3800), v2       ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
 0xf0,0x04,0x0a,0x78
-# GFX11: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xf0,0x04,0x0a,0x78]
+# GFX11: v_pk_fmac_f16 v5, 0x38003800, v2        ; encoding: [0xf0,0x04,0x0a,0x78]
 
 0xfd,0x04,0x0a,0x78
 # GFX11: v_pk_fmac_f16 v5, src_scc, v2           ; encoding: [0xfd,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
index 71ac49b8a469a..eadd522456e5b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
@@ -1921,8 +1921,11 @@
 0xc1,0x04,0x0a,0x78
 # GFX12: v_pk_fmac_f16 v5, -1, v2                ; encoding: [0xc1,0x04,0x0a,0x78]
 
+0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00
+# GFX12: v_pk_fmac_f16 v5, lit(0x3800), v2       ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
 0xf0,0x04,0x0a,0x78
-# GFX12: v_pk_fmac_f16 v5, 0.5, v2               ; encoding: [0xf0,0x04,0x0a,0x78]
+# GFX12: v_pk_fmac_f16 v5, 0x38003800, v2        ; encoding: [0xf0,0x04,0x0a,0x78]
 
 0xfd,0x04,0x0a,0x78
 # GFX12: v_pk_fmac_f16 v5, src_scc, v2           ; encoding: [0xfd,0x04,0x0a,0x78]

``````````

</details>


https://github.com/llvm/llvm-project/pull/176659