[llvm] [ADMGPU] Replace `isInlinableLiteral16` with specific version (PR #84402)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 7 15:49:36 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
The current implementation of `isInlinableLiteral16` assumes, a 16-bit inlinable
literal is either an `i16` or a `fp16`. This is not always true because of
`bf16`. However, we can't tell `fp16` and `bf16` apart by just looking at the
value. This patch splits `isInlinableLiteral16` into three versions, `i16`,
`fp16`, `bf16` respectively, and call the corresponding version.
---
Patch is 322.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84402.diff
46 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+74-21)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+15-14)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h (+2-2)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+5-6)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+22-6)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+22-3)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+1-3)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+5-3)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+7-1)
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+1)
- (modified) llvm/test/CodeGen/AMDGPU/immv216.ll (+21-21)
- (modified) llvm/test/CodeGen/AMDGPU/inline-constraints.ll (-12)
- (modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+96-48)
- (modified) llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll (+1-1)
- (modified) llvm/test/MC/AMDGPU/gfx10_asm_vop1.s (+8-8)
- (modified) llvm/test/MC/AMDGPU/gfx10_asm_vop3.s (+96-96)
- (modified) llvm/test/MC/AMDGPU/gfx10_asm_vopc.s (+48-48)
- (modified) llvm/test/MC/AMDGPU/gfx10_asm_vopc_e64.s (+96-96)
- (modified) llvm/test/MC/AMDGPU/gfx10_asm_vopcx.s (+72-72)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1.s (+36-36)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3.s (+54-54)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s (+5-5)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s (+48-48)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s (+66-66)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc.s (+24-24)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s (+13-13)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1.s (+5-5)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+54-54)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s (+5-5)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s (+48-48)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s (+24-24)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc.s (+24-24)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s (+12-12)
- (modified) llvm/test/MC/AMDGPU/gfx8_asm_vop1.s (+4-4)
- (modified) llvm/test/MC/AMDGPU/gfx8_asm_vop2.s (+22-22)
- (modified) llvm/test/MC/AMDGPU/gfx8_asm_vopc.s (+66-64)
- (modified) llvm/test/MC/AMDGPU/gfx9-asm-err.s (+10-10)
- (modified) llvm/test/MC/AMDGPU/gfx9_asm_vop1.s (+4-4)
- (modified) llvm/test/MC/AMDGPU/gfx9_asm_vop2.s (+24-22)
- (modified) llvm/test/MC/AMDGPU/gfx9_asm_vopc.s (+64-64)
- (modified) llvm/test/MC/AMDGPU/gfx9_err_pos.s (-10)
- (modified) llvm/test/MC/AMDGPU/vop3-gfx10.s (+2-2)
- (modified) llvm/test/MC/AMDGPU/vop3-gfx9.s (+8-8)
- (modified) llvm/test/MC/AMDGPU/vop3.s (+2-2)
- (modified) llvm/test/MC/AMDGPU/vop_sdwa.s (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d5efd441556252..07c6cf7e13f894 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1926,6 +1926,11 @@ static const fltSemantics *getFltSemantics(MVT VT) {
static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
switch (OperandType) {
+ // When floating-point immediate is used as operand of type i16, the 32-bit
+ // representation of the constant truncated to the 16 LSBs should be used.
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
@@ -1949,13 +1954,10 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
return &APFloat::IEEEdouble();
- case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
- case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
@@ -2001,13 +2003,15 @@ static bool isSafeTruncation(int64_t Val, unsigned Size) {
}
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
- if (VT.getScalarType() == MVT::i16) {
- // FP immediate values are broken.
- return isInlinableIntLiteral(Val);
- }
+ if (VT.getScalarType() == MVT::i16)
+ return isInlinableLiteral32(Val, HasInv2Pi);
+
+ if (VT.getScalarType() == MVT::f16)
+ return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
- // f16/v2f16 operands work correctly for all values.
- return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
+ assert(VT.getScalarType() == MVT::bf16);
+
+ return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
}
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2041,9 +2045,30 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
return false;
if (type.getScalarSizeInBits() == 16) {
- return isInlineableLiteralOp16(
- static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
- type, AsmParser->hasInv2PiInlineImm());
+ bool Lost = false;
+ switch (type.getScalarType().SimpleTy) {
+ default:
+ llvm_unreachable("unknown 16-bit type");
+ case MVT::bf16:
+ FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
+ &Lost);
+ break;
+ case MVT::f16:
+ FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
+ &Lost);
+ break;
+ case MVT::i16:
+ FPLiteral.convert(APFloatBase::IEEEsingle(),
+ APFloat::rmNearestTiesToEven, &Lost);
+ break;
+ }
+ // We need to use 32-bit representation here because when a floating-point
+ // inline constant is used as an i16 operand, its 32-bit representation
+ // representation will be used. We will need the 32-bit value to check if
+ // it is FP inline constant.
+ uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
+ return isInlineableLiteralOp16(ImmVal, type,
+ AsmParser->hasInv2PiInlineImm());
}
// Check if single precision literal is inlinable
@@ -2375,15 +2400,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
- case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ if (isSafeTruncation(Val, 16) &&
+ AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
+ Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
+ setImmKindConst();
+ return;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Val & 0xffff));
+ setImmKindLiteral();
+ return;
+
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
if (isSafeTruncation(Val, 16) &&
- AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
- AsmParser->hasInv2PiInlineImm())) {
+ AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
+ AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
setImmKindConst();
return;
@@ -2410,12 +2446,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
+ assert(isSafeTruncation(Val, 16));
+ assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
+ }
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
assert(isSafeTruncation(Val, 16));
- assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
- AsmParser->hasInv2PiInlineImm()));
+ assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
+ AsmParser->hasInv2PiInlineImm()));
Inst.addOperand(MCOperand::createImm(Val));
return;
@@ -3559,7 +3600,19 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
return AMDGPU::isInlinableLiteralV2BF16(Val);
- return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
+ return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
+
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
+ return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
+
+ llvm_unreachable("invalid operand type");
}
default:
llvm_unreachable("invalid operand size");
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index a32be1e50a6053..b6a95906bc45c6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -451,19 +451,20 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- int16_t SImm = static_cast<int16_t>(Imm);
+ int32_t SImm = static_cast<int32_t>(Imm);
if (isInlinableIntLiteral(SImm)) {
O << SImm;
- } else {
- uint64_t Imm16 = static_cast<uint16_t>(Imm);
- O << formatHex(Imm16);
+ return;
}
+
+ if (printImmediateFloat32(Imm, STI, O))
+ return;
+
+ O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
}
-// This must accept a 32-bit immediate value to correctly handle packed 16-bit
-// operations.
-static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
- raw_ostream &O) {
+static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
if (Imm == 0x3C00)
O << "1.0";
else if (Imm == 0xBC00)
@@ -529,9 +530,9 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
O << formatHex(static_cast<uint64_t>(Imm));
}
-void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
if (isInlinableIntLiteral(SImm)) {
O << SImm;
@@ -539,7 +540,7 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
}
uint16_t HImm = static_cast<uint16_t>(Imm);
- if (printImmediateFloat16(HImm, STI, O))
+ if (printImmediateFP16(HImm, STI, O))
return;
uint64_t Imm16 = static_cast<uint16_t>(Imm);
@@ -566,7 +567,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
if (isUInt<16>(Imm) &&
- printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
+ printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
return;
break;
case AMDGPU::OPERAND_REG_IMM_V2BF16:
@@ -845,7 +846,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
- printImmediate16(Op.getImm(), STI, O);
+ printImmediateF16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 15ecbf2e5e5918..c801eaf1111e2f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O);
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
- raw_ostream &O);
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateF16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediateV216(uint32_t Imm, uint8_t OpType,
const MCSubtargetInfo &STI, raw_ostream &O);
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index e51bb40132f96e..fb93f45e3e87a3 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -116,11 +116,6 @@ static uint32_t getIntInlineImmEncoding(IntTy Imm) {
return 0;
}
-static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) {
- uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
- return IntImm == 0 ? 255 : IntImm;
-}
-
static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
if (IntImm != 0)
@@ -214,6 +209,10 @@ static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
return 255;
}
+static uint32_t getLit16IntEncoding(uint32_t Val, const MCSubtargetInfo &STI) {
+ return getLit32Encoding(Val, STI);
+}
+
static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
if (IntImm != 0)
@@ -296,7 +295,7 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
- return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ return getLit16IntEncoding(static_cast<uint32_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e105db313548dd..1889ab00728800 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15495,16 +15495,32 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
llvm_unreachable("Invalid asm constraint");
}
-bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
- uint64_t Val,
+bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
unsigned MaxSize) const {
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
- if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
- (Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
- (Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
- return true;
+ if (Size == 16) {
+ MVT VT = Op.getSimpleValueType();
+ switch (VT.SimpleTy) {
+ default:
+ return false;
+ case MVT::i16:
+ return AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
+ case MVT::f16:
+ return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
+ case MVT::bf16:
+ return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
+ case MVT::v2i16:
+ return AMDGPU::getInlineEncodingV2I16(Val).has_value();
+ case MVT::v2f16:
+ return AMDGPU::getInlineEncodingV2F16(Val).has_value();
+ case MVT::v2bf16:
+ return AMDGPU::getInlineEncodingV2BF16(Val).has_value();
+ }
}
+ if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
+ (Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
+ return true;
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ca2c2d87009f2f..e8022c8b0afa19 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4121,13 +4121,32 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
ST.hasInv2PiInlineImm());
case 16:
return ST.has16BitInsts() &&
- AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
- ST.hasInv2PiInlineImm());
+ AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
+ ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
}
}
+bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
+ APInt IntImm = Imm.bitcastToAPInt();
+ int64_t IntImmVal = IntImm.getSExtValue();
+ bool HasInv2Pi = ST.hasInv2PiInlineImm();
+ switch (APFloat::SemanticsToEnum(Imm.getSemantics())) {
+ default:
+ llvm_unreachable("invalid fltSemantics");
+ case APFloatBase::S_IEEEsingle:
+ case APFloatBase::S_IEEEdouble:
+ return isInlineConstant(IntImm);
+ case APFloatBase::S_BFloat:
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteralBF16(IntImmVal, HasInv2Pi);
+ case APFloatBase::S_IEEEhalf:
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteralFP16(IntImmVal, HasInv2Pi);
+ }
+}
+
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint8_t OperandType) const {
assert(!MO.isReg() && "isInlineConstant called on register operand!");
@@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
// constants in these cases
int16_t Trunc = static_cast<int16_t>(Imm);
return ST.has16BitInsts() &&
- AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
+ AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
}
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 4200e0c8a29e1c..a62bf779fe2e2d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -984,9 +984,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool isInlineConstant(const APInt &Imm) const;
- bool isInlineConstant(const APFloat &Imm) const {
- return isInlineConstant(Imm.bitcastToAPInt());
- }
+ bool isInlineConstant(const APFloat &Imm) const;
// Returns true if this non-register operand definitely does not need to be
// encoded as a 32-bit literal. Note that this function handles all kinds of
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 62903a244dc892..edb0e50da2896b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2657,13 +2657,15 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3E22; // 1.0 / (2.0 * pi)
}
-bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
+bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
+ return isInlinableLiteral32(Literal, HasInv2Pi);
+}
+
+bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;
-
if (isInlinableIntLiteral(Literal))
return true;
-
uint16_t Val = static_cast<uint16_t>(Literal);
return Val == 0x3C00 || // 1.0
Val == 0xBC00 || // -1.0
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index bb307cb67c9b79..d7ea2a3eff4b72 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1404,7 +1404,13 @@ LLVM_READNONE
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
-bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
+bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
LLVM_READNONE
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index e1131bbb78d3fc..7f0b045b4419a4 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1007,6 +1007,7 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
!cast<RegisterOperand>("VISrc_"#ArgTy[3].Size#
!cond(IsC_F32: "_f32",
IsC_F16: "_f16",
+ IsC_BF16: "_bf16",
1: "_b32")));
// For f16 and bf16 matrices A and B, each element can be modified by
diff --git a/llvm/test/CodeGen/AMDGPU/immv216.ll b/llvm/test/CodeGen/AMDGPU/immv216.ll
index b66ca71a327495..ae51c3edf1c7e7 100644
--- a/llvm/test/CodeGen/AMDGPU/immv216.ll
+++ b/llvm/test/CodeGen/AMDGPU/immv216.ll
@@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
}
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
-; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
+; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
-; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/84402
More information about the llvm-commits
mailing list