[llvm] [AMDGPU] Fix wrong operand value when floating-point value is used as operand of type i16 (PR #84106)

Tue Mar 5 18:56:46 PST 2024

https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/84106

Based on the section "OPF_INV2PI_16" of the spec, when a floating-point value is
used as operand of type `i16`, the 32-bit representation of the constant
truncated to the 16 LSBs should be used. Currently we directly use the FP16
representation, which doesn't conform with the spec.

For example, when `0.5` is used, for now we take it as `0x3800` because that is
the encoding of `<half 0.5>`. Instead, it should be `0x3f000000` truncated to 16
LSB, which is `0x0000`.

>From a2eb5177cf034134c1969721862729f596ca66e6 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 5 Mar 2024 21:50:56 -0500
Subject: [PATCH] [AMDGPU] Fix wrong operand value when floating-point value is
 used as operand of type i16

Based on the section "OPF_INV2PI_16" of the spec, when a floating-point value is
used as operand of type `i16`, the 32-bit representation of the constant
truncated to the 16 LSBs should be used. Currently we directly use the FP16
representation, which doesn't conform with the spec.

For example, when `0.5` is used, for now we take it as `0x3800` because that is
the encoding of `<half 0.5>`. Instead, it should be `0x3f000000` truncated to 16
LSB, which is `0x0000`.
---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      | 33 ++++++++++++++++---
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 10999d846e3bb2..e2203acf1e9795 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1926,6 +1926,11 @@ static const fltSemantics *getFltSemantics(MVT VT) {
 
 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
   switch (OperandType) {
+  // When floating-point immediate is used as operand of type i16, the 32-bit
+  // representation of the constant truncated to the 16 LSBs should be used.
+  case AMDGPU::OPERAND_REG_IMM_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
@@ -1949,13 +1954,10 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
     return &APFloat::IEEEdouble();
-  case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
-  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
-  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
   case AMDGPU::OPERAND_REG_IMM_V2FP16:
@@ -2045,9 +2047,26 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
       return false;
 
     if (type.getScalarSizeInBits() == 16) {
+      bool Lost;
+      switch (type.getScalarType().SimpleTy) {
+      default:
+        llvm_unreachable("unknown 16-bit type");
+      case MVT::bf16:
+        FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
+                          &Lost);
+        break;
+      case MVT::f16:
+        FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
+                          &Lost);
+        break;
+      case MVT::i16:
+        FPLiteral.convert(APFloatBase::IEEEsingle(),
+                          APFloat::rmNearestTiesToEven, &Lost);
+        break;
+      }
       return isInlineableLiteralOp16(
-        static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
-        type, AsmParser->hasInv2PiInlineImm());
+          static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), type,
+          AsmParser->hasInv2PiInlineImm());
     }
 
     // Check if single precision literal is inlinable
@@ -2315,6 +2334,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
       // checked earlier in isLiteralImm()
 
       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
+      if (OpTy == AMDGPU::OPERAND_REG_IMM_INT16 OpTy ==
+          AMDGPU::OPERAND_REG_INLINE_C_INT16 OpTy ==
+          AMDGPU::OPERAND_REG_INLINE_AC_INT16)
+        ImmVal = ImmVal & 0xffff;
       Inst.addOperand(MCOperand::createImm(ImmVal));
       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
         setImmKindMandatoryLiteral();