[llvm] [AMDGPU] Fix wrong operand value when floating-point value is used as operand of type i16 (PR #84106)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 5 18:56:46 PST 2024
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/84106
Based on the section "OPF_INV2PI_16" of the spec, when a floating-point value is
used as operand of type `i16`, the 32-bit representation of the constant
truncated to the 16 LSBs should be used. Currently we directly use the FP16
representation, which doesn't conform with the spec.
For example, when `0.5` is used, for now we take it as `0x3800` because that is
the encoding of `<half 0.5>`. Instead, it should be `0x3f000000` truncated to 16
LSB, which is `0x0000`.
>From a2eb5177cf034134c1969721862729f596ca66e6 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 5 Mar 2024 21:50:56 -0500
Subject: [PATCH] [AMDGPU] Fix wrong operand value when floating-point value is
used as operand of type i16
Based on the section "OPF_INV2PI_16" of the spec, when a floating-point value is
used as operand of type `i16`, the 32-bit representation of the constant
truncated to the 16 LSBs should be used. Currently we directly use the FP16
representation, which doesn't conform with the spec.
For example, when `0.5` is used, for now we take it as `0x3800` because that is
the encoding of `<half 0.5>`. Instead, it should be `0x3f000000` truncated to 16
LSB, which is `0x0000`.
---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 33 ++++++++++++++++---
1 file changed, 28 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 10999d846e3bb2..e2203acf1e9795 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1926,6 +1926,11 @@ static const fltSemantics *getFltSemantics(MVT VT) {
static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
switch (OperandType) {
+ // When floating-point immediate is used as operand of type i16, the 32-bit
+ // representation of the constant truncated to the 16 LSBs should be used.
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
@@ -1949,13 +1954,10 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
return &APFloat::IEEEdouble();
- case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
- case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
@@ -2045,9 +2047,26 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
return false;
if (type.getScalarSizeInBits() == 16) {
+ bool Lost;
+ switch (type.getScalarType().SimpleTy) {
+ default:
+ llvm_unreachable("unknown 16-bit type");
+ case MVT::bf16:
+ FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
+ &Lost);
+ break;
+ case MVT::f16:
+ FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
+ &Lost);
+ break;
+ case MVT::i16:
+ FPLiteral.convert(APFloatBase::IEEEsingle(),
+ APFloat::rmNearestTiesToEven, &Lost);
+ break;
+ }
return isInlineableLiteralOp16(
- static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
- type, AsmParser->hasInv2PiInlineImm());
+ static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), type,
+ AsmParser->hasInv2PiInlineImm());
}
// Check if single precision literal is inlinable
@@ -2315,6 +2334,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
// checked earlier in isLiteralImm()
uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
+ if (OpTy == AMDGPU::OPERAND_REG_IMM_INT16 OpTy ==
+ AMDGPU::OPERAND_REG_INLINE_C_INT16 OpTy ==
+ AMDGPU::OPERAND_REG_INLINE_AC_INT16)
+ ImmVal = ImmVal & 0xffff;
Inst.addOperand(MCOperand::createImm(ImmVal));
if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
setImmKindMandatoryLiteral();
More information about the llvm-commits
mailing list