[llvm] [AMDGPU] Fix inline constant encoding for `v_pk_fmac_f16` (PR #176659)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 18 09:21:38 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
This PR handles`v_pk_fmac_f16` inline constant encoding/decoding differences between pre-GFX11 and GFX11+ hardware.
- Pre-GFX11: fp16 inline constants produce (f16, 0) - value in low 16 bits, zero in high.
- GFX11+: fp16 inline constants are duplicated to both halves (f16, f16).
---
Full diff: https://github.com/llvm/llvm-project/pull/176659.diff
12 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+12-2)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+8)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+14)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+9-1)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+56)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+10)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s (+4-1)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2.s (+4-1)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s (+4-1)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop2.s (+4-1)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt (+4-1)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt (+4-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index dd3120f05ce26..74c6ce04b9ad6 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -528,12 +528,22 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
break;
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
Imm = getInlineImmValF16(Imm);
break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
+ // halves, so we need to produce the duplicated value for correct
+ // round-trip.
+ if (AMDGPU::isPKFMACF16(MI.getOpcode()) && isGFX11Plus()) {
+ int64_t F16Val = getInlineImmValF16(Imm);
+ Imm = (F16Val << 16) | (F16Val & 0xFFFF);
+ } else {
+ Imm = getInlineImmValF16(Imm);
+ }
+ break;
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 5777e77c25e55..37a8b5a354f07 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -350,6 +350,14 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding(
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ // V_PK_FMAC_F16 has different inline constant behavior on pre-GFX11 vs
+ // GFX11+: pre-GFX11 produces (f16, 0), GFX11+ duplicates f16 to both
+ // halves.
+ if (AMDGPU::isPKFMACF16(Desc.getOpcode())) {
+ return AMDGPU::getPKFMACF16InlineEncoding(static_cast<uint32_t>(Imm),
+ !AMDGPU::isGFX11Plus(STI))
+ .value_or(255);
+ }
return AMDGPU::getInlineEncodingV2F16(static_cast<uint32_t>(Imm))
.value_or(255);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6392022368785..56f4a907d5236 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4763,6 +4763,20 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const {
}
}
+bool SIInstrInfo::isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
+ int64_t ImmVal, uint8_t OpType) const {
+ // V_PK_FMAC_F16 has different inline constant behavior:
+ // - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
+ // - GFX11+: fp16 inline constants are duplicated into both halves
+ if ((OpType == AMDGPU::OPERAND_REG_IMM_V2FP16 ||
+ OpType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) &&
+ (MI.getOpcode() == AMDGPU::V_PK_FMAC_F16_e32 ||
+ MI.getOpcode() == AMDGPU::V_PK_FMAC_F16_e64))
+ return AMDGPU::isPKFMACF16InlineConstant(ImmVal, !AMDGPU::isGFX11Plus(ST));
+
+ return isInlineConstant(ImmVal, OpType);
+}
+
static bool compareMachineOp(const MachineOperand &Op0,
const MachineOperand &Op1) {
if (Op0.getType() != Op1.getType())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index bf51b22274f3d..90e39322ad507 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1278,7 +1278,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return isInlineConstant(ImmVal, OpType);
}
- return isInlineConstant(ImmVal, MI.getDesc().operands()[OpIdx].OperandType);
+ return isInlineConstant(MI, OpIdx, ImmVal,
+ MI.getDesc().operands()[OpIdx].OperandType);
}
bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
@@ -1290,6 +1291,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return isInlineConstant(*MO.getParent(), MO.getOperandNo());
}
+ /// Check if \p ImmVal can be used as an inline constant for operand \p OpIdx
+ /// in instruction \p MI with operand type \p OpType.
+ /// This handles V_PK_FMAC_F16 specially due to different inline constant
+ /// behavior on pre-GFX11 vs GFX11+.
+ bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, int64_t ImmVal,
+ uint8_t OpType) const;
+
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
const MachineOperand &MO) const;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 6e689028fbd83..d8f4004bb6a02 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3168,6 +3168,34 @@ std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
return getInlineEncodingV216(true, Literal);
}
+// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
+// or nullopt. This accounts for different inline constant behavior:
+// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
+// - GFX11+: fp16 inline constants are duplicated into both halves
+std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
+ bool IsPreGFX11) {
+ // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
+ if (IsPreGFX11)
+ return getInlineEncodingV216(/*IsFloat=*/true, Literal);
+
+ // GFX11+ behavior: f16 duplicated in both halves
+ // First, check for sign-extended integer inline constants (-16 to 64)
+ // These work the same across all generations
+ int32_t Signed = static_cast<int32_t>(Literal);
+ if (Signed >= 0 && Signed <= 64)
+ return 128 + Signed;
+
+ if (Signed >= -16 && Signed <= -1)
+ return 192 + std::abs(Signed);
+
+ // For float inline constants on GFX11+, both halves must be equal
+ uint16_t Lo = static_cast<uint16_t>(Literal);
+ uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
+ if (Lo != Hi)
+ return std::nullopt;
+ return getInlineEncodingV216(/*IsFloat=*/true, Lo);
+}
+
// Whether the given literal can be inlined for a V_PK_* instruction.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
switch (OpType) {
@@ -3202,6 +3230,11 @@ bool isInlinableLiteralV2F16(uint32_t Literal) {
return getInlineEncodingV2F16(Literal).has_value();
}
+// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
+bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsPreGFX11) {
+ return getPKFMACF16InlineEncoding(Literal, IsPreGFX11).has_value();
+}
+
bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
if (IsFP64)
return !Lo_32(Val);
@@ -3662,6 +3695,29 @@ bool isPackedFP32Inst(unsigned Opc) {
}
}
+bool isPKFMACF16(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::V_PK_FMAC_F16_dpp:
+ case AMDGPU::V_PK_FMAC_F16_e32:
+ case AMDGPU::V_PK_FMAC_F16_e64:
+ case AMDGPU::V_PK_FMAC_F16_e64_dpp:
+ case AMDGPU::V_PK_FMAC_F16_sdwa:
+ case AMDGPU::V_PK_FMAC_F16_dpp8_gfx10:
+ case AMDGPU::V_PK_FMAC_F16_dpp_gfx10:
+ case AMDGPU::V_PK_FMAC_F16_dpp_gfx9:
+ case AMDGPU::V_PK_FMAC_F16_e32_gfx10:
+ case AMDGPU::V_PK_FMAC_F16_e32_gfx11:
+ case AMDGPU::V_PK_FMAC_F16_e32_gfx12:
+ case AMDGPU::V_PK_FMAC_F16_e32_gfx9:
+ case AMDGPU::V_PK_FMAC_F16_e32_vi:
+ case AMDGPU::V_PK_FMAC_F16_e64_gfx9:
+ case AMDGPU::V_PK_FMAC_F16_sdwa_gfx9:
+ return true;
+ default:
+ return false;
+ }
+}
+
const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
assert(isFixedDims() && "expect kind to be FixedDims");
return Dims;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 9d9a7e3d862de..d2183d9cc3665 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1746,6 +1746,10 @@ std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
LLVM_READNONE
std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
+LLVM_READNONE
+std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
+ bool IsPreGFX11);
+
LLVM_READNONE
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
@@ -1758,6 +1762,9 @@ bool isInlinableLiteralV2BF16(uint32_t Literal);
LLVM_READNONE
bool isInlinableLiteralV2F16(uint32_t Literal);
+LLVM_READNONE
+bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsPreGFX11);
+
LLVM_READNONE
bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
@@ -1770,6 +1777,9 @@ bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
+/// Returns true if \p Opc is a V_PK_FMAC_F16 instruction variant.
+LLVM_READONLY bool isPKFMACF16(unsigned Opc);
+
LLVM_READONLY
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset);
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
index f05178dae37c9..83eeadb449aa6 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
@@ -1916,7 +1916,10 @@ v_pk_fmac_f16 v5, -1, v2
// GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, 0.5, v2
-// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, exec_hi, v2
// GFX11: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
index fbc6713245398..4beca887a3e55 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
@@ -2039,7 +2039,10 @@ v_pk_fmac_f16 v5, -1, v2
// GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, 0.5, v2
-// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, src_scc, v2
// GFX11: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
index 6c9c4c60e9817..c535adea8b821 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
@@ -1922,7 +1922,10 @@ v_pk_fmac_f16 v5, -1, v2
// GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, 0.5, v2
-// GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, exec_hi, v2
// GFX12: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
index e57d2c3e74d70..828430d2b2b95 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
@@ -2048,7 +2048,10 @@ v_pk_fmac_f16 v5, -1, v2
// GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, 0.5, v2
-// GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, src_scc, v2
// GFX12: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
index 9fc3f619529a2..84f2f895e0c80 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
@@ -1843,8 +1843,11 @@
0xc1,0x04,0x0a,0x78
# GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
+0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00
+# GFX11: v_pk_fmac_f16 v5, lit(0x3800), v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
0xf0,0x04,0x0a,0x78
-# GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
+# GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
0xfd,0x04,0x0a,0x78
# GFX11: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
index 71ac49b8a469a..eadd522456e5b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
@@ -1921,8 +1921,11 @@
0xc1,0x04,0x0a,0x78
# GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
+0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00
+# GFX12: v_pk_fmac_f16 v5, lit(0x3800), v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
0xf0,0x04,0x0a,0x78
-# GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
+# GFX12: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
0xfd,0x04,0x0a,0x78
# GFX12: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78]
``````````
</details>
https://github.com/llvm/llvm-project/pull/176659
More information about the llvm-commits
mailing list