[llvm] [AMDGPU] Fix wrong operand value when floating-point value is used as operand of type i16 (PR #84106)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 5 20:16:48 PST 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/84106
>From 267f7c0914b107265a72ea2ac4ab8c9c45f4d8b2 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 5 Mar 2024 23:16:30 -0500
Subject: [PATCH] [AMDGPU] Fix wrong operand value when floating-point value is
used as operand of type i16
Based on the section "OPF_INV2PI_16" of the spec, when a floating-point value is
used as operand of type `i16`, the 32-bit representation of the constant
truncated to the 16 LSBs should be used. Currently we directly use the FP16
representation, which doesn't conform with the spec.
For example, when `0.5` is used, for now we take it as `0x3800` because that is
the encoding of `<half 0.5>`. Instead, it should be `0x3f000000` truncated to 16
LSB, which is `0x0000`.
---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 33 ++++++++++++++++---
llvm/test/MC/AMDGPU/gfx10_asm_vop1.s | 16 ++++-----
2 files changed, 36 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 10999d846e3bb2..5050aec2613815 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1926,6 +1926,11 @@ static const fltSemantics *getFltSemantics(MVT VT) {
static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
switch (OperandType) {
+ // When floating-point immediate is used as operand of type i16, the 32-bit
+ // representation of the constant truncated to the 16 LSBs should be used.
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
@@ -1949,13 +1954,10 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
return &APFloat::IEEEdouble();
- case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
- case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
@@ -2045,9 +2047,26 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
return false;
if (type.getScalarSizeInBits() == 16) {
+ bool Lost;
+ switch (type.getScalarType().SimpleTy) {
+ default:
+ llvm_unreachable("unknown 16-bit type");
+ case MVT::bf16:
+ FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
+ &Lost);
+ break;
+ case MVT::f16:
+ FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
+ &Lost);
+ break;
+ case MVT::i16:
+ FPLiteral.convert(APFloatBase::IEEEsingle(),
+ APFloat::rmNearestTiesToEven, &Lost);
+ break;
+ }
return isInlineableLiteralOp16(
- static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
- type, AsmParser->hasInv2PiInlineImm());
+ static_cast<uint16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), type,
+ AsmParser->hasInv2PiInlineImm());
}
// Check if single precision literal is inlinable
@@ -2315,6 +2334,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
// checked earlier in isLiteralImm()
uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
+ if (OpTy == AMDGPU::OPERAND_REG_IMM_INT16 ||
+ OpTy == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
+ OpTy == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
+ ImmVal = ImmVal & 0xffff;
Inst.addOperand(MCOperand::createImm(ImmVal));
if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
setImmKindMandatoryLiteral();
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
index 1cfafebe2c3cd4..8b685805fb3ed9 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
@@ -11189,10 +11189,10 @@ v_cvt_f16_u16_e32 v5, -1
// GFX10: encoding: [0xc1,0xa0,0x0a,0x7e]
v_cvt_f16_u16_e32 v5, 0.5
-// GFX10: encoding: [0xff,0xa0,0x0a,0x7e,0x00,0x38,0x00,0x00]
+// GFX10: encoding: [0x80,0xa0,0x0a,0x7e]
v_cvt_f16_u16_e32 v5, -4.0
-// GFX10: encoding: [0xff,0xa0,0x0a,0x7e,0x00,0xc4,0x00,0x00]
+// GFX10: encoding: [0x80,0xa0,0x0a,0x7e]
v_cvt_f16_u16_e32 v5, 0xfe0b
// GFX10: encoding: [0xff,0xa0,0x0a,0x7e,0x0b,0xfe,0x00,0x00]
@@ -11237,10 +11237,10 @@ v_cvt_f16_u16_e64 v5, -1
// GFX10: encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00]
v_cvt_f16_u16_e64 v5, 0.5
-// GFX10: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
+// GFX10: encoding: [0x05,0x00,0xd0,0xd5,0x80,0x00,0x00,0x00]
v_cvt_f16_u16_e64 v5, -4.0
-// GFX10: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x00,0x00,0x00,0x00,0xc4,0x00,0x00]
+// GFX10: encoding: [0x05,0x00,0xd0,0xd5,0x80,0x00,0x00,0x00]
v_cvt_f16_u16_e64 v5, v1 clamp
// GFX10: encoding: [0x05,0x80,0xd0,0xd5,0x01,0x01,0x00,0x00]
@@ -11435,10 +11435,10 @@ v_cvt_f16_i16_e32 v5, -1
// GFX10: encoding: [0xc1,0xa2,0x0a,0x7e]
v_cvt_f16_i16_e32 v5, 0.5
-// GFX10: encoding: [0xff,0xa2,0x0a,0x7e,0x00,0x38,0x00,0x00]
+// GFX10: encoding: [0x80,0xa2,0x0a,0x7e]
v_cvt_f16_i16_e32 v5, -4.0
-// GFX10: encoding: [0xff,0xa2,0x0a,0x7e,0x00,0xc4,0x00,0x00]
+// GFX10: encoding: [0x80,0xa2,0x0a,0x7e]
v_cvt_f16_i16_e32 v5, 0xfe0b
// GFX10: encoding: [0xff,0xa2,0x0a,0x7e,0x0b,0xfe,0x00,0x00]
@@ -11483,10 +11483,10 @@ v_cvt_f16_i16_e64 v5, -1
// GFX10: encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00]
v_cvt_f16_i16_e64 v5, 0.5
-// GFX10: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00]
+// GFX10: encoding: [0x05,0x00,0xd1,0xd5,0x80,0x00,0x00,0x00]
v_cvt_f16_i16_e64 v5, -4.0
-// GFX10: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x00,0x00,0x00,0x00,0xc4,0x00,0x00]
+// GFX10: encoding: [0x05,0x00,0xd1,0xd5,0x80,0x00,0x00,0x00]
v_cvt_f16_i16_e64 v5, v1 clamp
// GFX10: encoding: [0x05,0x80,0xd1,0xd5,0x01,0x01,0x00,0x00]
More information about the llvm-commits
mailing list