[llvm-branch-commits] [llvm] release/22.x: [AMDGPU] Fix inline constant encoding for `v_pk_fmac_f16` (#176659) (PR #177365)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 22 06:19:02 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport c253b9f
Requested by: @<!-- -->shiltian
---
Full diff: https://github.com/llvm/llvm-project/pull/177365.diff
17 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+8)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+18-1)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+2)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+20)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+8)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+37)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+8)
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+6-1)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s (+3)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2.s (+3)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s (+4-1)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop2.s (+4-1)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt (+3)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt (+3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 7a91a40e18cde..fa4b790b88a79 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -666,6 +666,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
+ bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
+
bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
bool isVISrcB32() const {
@@ -2044,6 +2046,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_KIMM16:
return &APFloat::IEEEhalf();
@@ -2438,6 +2441,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
@@ -2480,6 +2484,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
@@ -3725,6 +3730,9 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
return AMDGPU::isInlinableLiteralV2F16(Val);
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT)
+ return AMDGPU::isPKFMACF16InlineConstant(Val, isGFX11Plus());
+
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
return AMDGPU::isInlinableLiteralV2BF16(Val);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index dd3120f05ce26..cc03fb988ddbb 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -528,12 +528,26 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
break;
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ Imm = getInlineImmValF16(Imm);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
Imm = getInlineImmValF16(Imm);
break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: {
+ // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
+ // halves, so we need to produce the duplicated value for correct
+ // round-trip.
+ if (isGFX11Plus()) {
+ int64_t F16Val = getInlineImmValF16(Imm);
+ Imm = (F16Val << 16) | (F16Val & 0xFFFF);
+ } else {
+ Imm = getInlineImmValF16(Imm);
+ }
+ break;
+ }
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
@@ -1597,6 +1611,9 @@ AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
case AMDGPU::OPERAND_REG_IMM_V2FP16:
UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
+ UseLit = AMDGPU::isPKFMACF16InlineConstant(Val, isGFX11Plus());
+ break;
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
break;
case AMDGPU::OPERAND_REG_IMM_INT16:
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index dad1ba7af9cf6..6d0c1bcbf1abe 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -374,6 +374,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return (Generation)Gen;
}
+ bool isGFX11Plus() const { return getGeneration() >= GFX11; }
+
unsigned getMaxWaveScratchSize() const {
// See COMPUTE_TMPRING_SIZE.WAVESIZE.
if (getGeneration() >= GFX12) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index b63d71dc2fde9..5a00cb8a4b6cb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -623,6 +623,25 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
return;
break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: {
+ if (AMDGPU::isGFX11Plus(STI)) {
+ // For GFX11+, the inline constant is duplicated to both channels, so we
+ // need to check if the low and high 16 bits are the same, and then if
+ // they can be printed as inline constant values.
+ uint16_t Lo16 = static_cast<uint16_t>(Imm & 0xFFFF);
+ uint16_t Hi16 = static_cast<uint16_t>((Imm >> 16) & 0xFFFF);
+ if (Lo16 == Hi16 &&
+ printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
+ return;
+ } else {
+ // For pre-GFX11, the inline constant is in the low 16 bits, so we need
+ // to check if it can be printed as inline constant value.
+ if (isUInt<16>(Imm) &&
+ printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
+ return;
+ }
+ break;
+ }
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
if (isUInt<16>(Imm) &&
@@ -867,6 +886,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 49e94183202bd..5b731cdf6d05f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -346,6 +346,14 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding(
return AMDGPU::getInlineEncodingV2F16(static_cast<uint32_t>(Imm))
.value_or(255);
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
+ // V_PK_FMAC_F16 has different inline constant behavior on pre-GFX11 vs
+ // GFX11+: pre-GFX11 produces (f16, 0), GFX11+ duplicates f16 to both
+ // halves.
+ return AMDGPU::getPKFMACF16InlineEncoding(static_cast<uint32_t>(Imm),
+ AMDGPU::isGFX11Plus(STI))
+ .value_or(255);
+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
return AMDGPU::getInlineEncodingV2BF16(static_cast<uint32_t>(Imm))
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index b9ee9c7015061..a7721cdad08bf 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -207,6 +207,7 @@ enum OperandType : unsigned {
OPERAND_REG_IMM_FP16,
OPERAND_REG_IMM_V2BF16,
OPERAND_REG_IMM_V2FP16,
+ OPERAND_REG_IMM_V2FP16_SPLAT,
OPERAND_REG_IMM_V2INT16,
OPERAND_REG_IMM_NOINLINE_V2FP16,
OPERAND_REG_IMM_V2INT32,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index bd6c58d0f8945..513145f83994a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4666,6 +4666,8 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const {
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return AMDGPU::isInlinableLiteralV2F16(Imm);
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
+ return AMDGPU::isPKFMACF16InlineConstant(Imm, ST.isGFX11Plus());
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
return AMDGPU::isInlinableLiteralV2BF16(Imm);
@@ -5133,6 +5135,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 4763b5f57b8c8..3009440c75161 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1429,6 +1429,8 @@ def VSrc_v2f32 : SrcRegOrImm9 <VS_64_AlignTarget, "OPERAND_REG_IMM_V2FP32">;
def VSrc_NoInline_v2f16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_IMM_NOINLINE_V2FP16">;
+def VSrc_v2f16_splat : SrcRegOrImm9 <VS_32, "OPERAND_REG_IMM_V2FP16_SPLAT">;
+
//===----------------------------------------------------------------------===//
// VRegSrc_* Operands with a VGPR
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4ad3a5cd1d727..10cdae63d602f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2807,6 +2807,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
@@ -3168,6 +3169,34 @@ std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
return getInlineEncodingV216(true, Literal);
}
+// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
+// or nullopt. This accounts for different inline constant behavior:
+// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
+// - GFX11+: fp16 inline constants are duplicated into both halves
+std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
+ bool IsGFX11Plus) {
+ // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
+ if (!IsGFX11Plus)
+ return getInlineEncodingV216(/*IsFloat=*/true, Literal);
+
+ // GFX11+ behavior: f16 duplicated in both halves
+ // First, check for sign-extended integer inline constants (-16 to 64)
+ // These work the same across all generations
+ int32_t Signed = static_cast<int32_t>(Literal);
+ if (Signed >= 0 && Signed <= 64)
+ return 128 + Signed;
+
+ if (Signed >= -16 && Signed <= -1)
+ return 192 + std::abs(Signed);
+
+ // For float inline constants on GFX11+, both halves must be equal
+ uint16_t Lo = static_cast<uint16_t>(Literal);
+ uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
+ if (Lo != Hi)
+ return std::nullopt;
+ return getInlineEncodingV216(/*IsFloat=*/true, Lo);
+}
+
// Whether the given literal can be inlined for a V_PK_* instruction.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
switch (OpType) {
@@ -3177,6 +3206,8 @@ bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return getInlineEncodingV216(true, Literal).has_value();
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
+ llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
return isInlinableLiteralV2BF16(Literal);
@@ -3202,6 +3233,11 @@ bool isInlinableLiteralV2F16(uint32_t Literal) {
return getInlineEncodingV2F16(Literal).has_value();
}
+// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
+bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus) {
+ return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
+}
+
bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
if (IsFP64)
return !Lo_32(Val);
@@ -3223,6 +3259,7 @@ int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
case OPERAND_REG_IMM_INT32:
case OPERAND_REG_IMM_V2BF16:
case OPERAND_REG_IMM_V2FP16:
+ case OPERAND_REG_IMM_V2FP16_SPLAT:
case OPERAND_REG_IMM_V2FP32:
case OPERAND_REG_IMM_V2INT16:
case OPERAND_REG_IMM_V2INT32:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 770f9a86dc883..835ebfad9330d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1702,6 +1702,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
return 2;
@@ -1747,6 +1748,10 @@ std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
LLVM_READNONE
std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
+LLVM_READNONE
+std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
+ bool IsGFX11Plus);
+
LLVM_READNONE
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
@@ -1759,6 +1764,9 @@ bool isInlinableLiteralV2BF16(uint32_t Literal);
LLVM_READNONE
bool isInlinableLiteralV2F16(uint32_t Literal);
+LLVM_READNONE
+bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
+
LLVM_READNONE
bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 42e4fe7fe26af..799bdb8b18c9c 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1348,10 +1348,15 @@ let isCommutable = 1 in
def V_FMAAK_F64 : VOP2_Pseudo<"v_fmaak_f64", VOP_MADAK_F64, [], "">;
} // End SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1, FixedSize = 1, Size = 12, SchedRW = [Write64Bit]
+// A dedicated profile for V_PK_FMAC_F16.
+def VOP_V2F16_V2F16_V2F16_SPLAT : VOPProfile <[v2f16, v2f16, v2f16, untyped]> {
+ let Src0RC32 = VSrc_v2f16_splat;
+}
+
let SubtargetPredicate = HasPkFmacF16Inst in {
// FIXME: V_PK_FMAC_F16 is currently not used in instruction selection.
// If this changes, ensure the DPP variant is not used for GFX11+.
-defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
+defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16_SPLAT>;
} // End SubtargetPredicate = HasPkFmacF16Inst
// Note: 16-bit instructions produce a 0 result in the high 16-bits
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
index f05178dae37c9..bd670eb88c903 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s
@@ -1916,6 +1916,9 @@ v_pk_fmac_f16 v5, -1, v2
// GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, 0.5, v2
+// GFX11: v_pk_fmac_f16 v5, 0x3800, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, exec_hi, v2
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
index fbc6713245398..c60240362cc0f 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
@@ -2039,6 +2039,9 @@ v_pk_fmac_f16 v5, -1, v2
// GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, 0.5, v2
+// GFX11: v_pk_fmac_f16 v5, 0x3800, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, src_scc, v2
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
index 6c9c4c60e9817..c535adea8b821 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s
@@ -1922,7 +1922,10 @@ v_pk_fmac_f16 v5, -1, v2
// GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, 0.5, v2
-// GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, exec_hi, v2
// GFX12: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
index e57d2c3e74d70..828430d2b2b95 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s
@@ -2048,7 +2048,10 @@ v_pk_fmac_f16 v5, -1, v2
// GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, 0.5, v2
-// GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
+// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
+v_pk_fmac_f16 v5, 0x38003800, v2
+// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
v_pk_fmac_f16 v5, src_scc, v2
// GFX12: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
index 9fc3f619529a2..c9ef581fbfb20 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
@@ -1843,6 +1843,9 @@
0xc1,0x04,0x0a,0x78
# GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
+0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00
+# GFX11: v_pk_fmac_f16 v5, 0x3800, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
0xf0,0x04,0x0a,0x78
# GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
index 71ac49b8a469a..05e3291dea0a2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt
@@ -1921,6 +1921,9 @@
0xc1,0x04,0x0a,0x78
# GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
+0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00
+# GFX12: v_pk_fmac_f16 v5, 0x3800, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00]
+
0xf0,0x04,0x0a,0x78
# GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
``````````
</details>
https://github.com/llvm/llvm-project/pull/177365
More information about the llvm-branch-commits
mailing list