[llvm] [WIP][AMDGPU] Split `isInlinableLiteral16` into three and call the specific version if possible (PR #81345)

Sun Feb 11 20:07:05 PST 2024

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/81345

>From f14cb53cf91fe42f9d5f0646d6ce39c23d69f7b0 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sun, 11 Feb 2024 23:06:54 -0500
Subject: [PATCH 1/2] [AMDGPU] Remove unused functions for checking 16-bit
 inline literals

This patch removes unused functions that check if an immediate is a 16-bit inline
literals. This serves as prime patches to fix #79369.
---
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp       |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h         | 13 +++----------
 .../lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 12 ++----------
 llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h  |  4 +---
 llvm/lib/Target/AMDGPU/SIInstrInfo.td               | 10 +---------
 5 files changed, 8 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5278b552a65514..024adcda0fa061 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -327,7 +327,7 @@ bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
     return TII->isInlineConstant(C->getAPIntValue());
 
   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
-    return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+    return TII->isInlineConstant(C->getValueAPF());
 
   return false;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 3b42d88df0c246..f987b747c0e21b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -105,18 +105,11 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
 
 private:
   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
-  bool isInlineImmediate(const SDNode *N) const;
-
-  bool isInlineImmediate16(int64_t Imm) const {
-    return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
-  }
 
-  bool isInlineImmediate32(int64_t Imm) const {
-    return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
-  }
+  bool isInlineImmediate(const SDNode *N) const;
 
-  bool isInlineImmediate64(int64_t Imm) const {
-    return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
+  bool isInlineImmediate(const APInt &Imm) const {
+    return Subtarget->getInstrInfo()->isInlineConstant(Imm);
   }
 
   bool isInlineImmediate(const APFloat &Imm) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5657880279962b..14879e78175087 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5746,16 +5746,8 @@ void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
   MIB.addImm(ExpVal);
 }
 
-bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
-  return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm());
-}
-
-bool AMDGPUInstructionSelector::isInlineImmediate32(int64_t Imm) const {
-  return AMDGPU::isInlinableLiteral32(Imm, STI.hasInv2PiInlineImm());
-}
-
-bool AMDGPUInstructionSelector::isInlineImmediate64(int64_t Imm) const {
-  return AMDGPU::isInlinableLiteral64(Imm, STI.hasInv2PiInlineImm());
+bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {
+  return TII.isInlineConstant(Imm);
 }
 
 bool AMDGPUInstructionSelector::isInlineImmediate(const APFloat &Imm) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index ef7630f137aca6..f561d5d29efc43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -353,9 +353,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
   void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI,
                               int OpIdx) const;
 
-  bool isInlineImmediate16(int64_t Imm) const;
-  bool isInlineImmediate32(int64_t Imm) const;
-  bool isInlineImmediate64(int64_t Imm) const;
+  bool isInlineImmediate(const APInt &Imm) const;
   bool isInlineImmediate(const APFloat &Imm) const;
 
   // Returns true if TargetOpcode::G_AND MachineInstr `MI`'s masking of the
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 22599773d562cb..71cae75feec226 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -797,16 +797,8 @@ def i64imm_32bit : ImmLeaf<i64, [{
   return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
 }]>;
 
-def InlineImm16 : ImmLeaf<i16, [{
-  return isInlineImmediate16(Imm);
-}]>;
-
-def InlineImm32 : ImmLeaf<i32, [{
-  return isInlineImmediate32(Imm);
-}]>;
-
 def InlineImm64 : ImmLeaf<i64, [{
-  return isInlineImmediate64(Imm);
+  return isInlineImmediate(APInt(64, static_cast<uint64_t>(Imm)));
 }]>;
 
 def InlineImmFP32 : FPImmLeaf<f32, [{

>From e47996c2b8677327cb7047d7b3e00124a3083192 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sun, 11 Feb 2024 23:06:54 -0500
Subject: [PATCH 2/2] [WIP][AMDGPU] Split `isInlinableLiteral16` into three and
 call the specific version if possible

The current implementation of `isInlinableLiteral16` assumes, a 16-bit inlinable
literal is either an i16 or a fp16. This is not always true because of bf16.
However, we can't tell fp16 and bf16 apart by just looking at the value. This
patch tries to split `isInlinableLiteral16` into three versions, i16, fp16, bf16
respectively, and call the corresponding version.

This patch is based on #81282. The current status is, only two uses of original
`isInlinableLiteral16` are still there. We need to add an extra argument to indicate
the type of the operand the immediate corresponds to. This will also require the
change of the function signature of the two callers.
---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      | 47 ++++++++++++++-----
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 22 ++++++---
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h   |  4 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  2 +-
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 34 ++++++++++++++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  6 +++
 6 files changed, 94 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index a94da992b33859..56e1bc25cc6efe 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1927,8 +1927,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
     return isInlinableIntLiteral(Val);
   }
 
-  // f16/v2f16 operands work correctly for all values.
-  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
+  if (VT.getScalarType() == MVT::f16)
+    return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
+
+  assert(VT.getScalarType() == MVT::bf16);
+
+  return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
 }
 
 bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2277,15 +2281,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     return;
 
   case AMDGPU::OPERAND_REG_IMM_INT16:
-  case AMDGPU::OPERAND_REG_IMM_FP16:
-  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
-  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+    if (isSafeTruncation(Val, 16) &&
+        AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
+      Inst.addOperand(MCOperand::createImm(Val));
+      setImmKindConst();
+      return;
+    }
+
+    Inst.addOperand(MCOperand::createImm(Val & 0xffff));
+    setImmKindLiteral();
+    return;
+
+  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
     if (isSafeTruncation(Val, 16) &&
-        AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
-                                     AsmParser->hasInv2PiInlineImm())) {
+        AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
+                                       AsmParser->hasInv2PiInlineImm())) {
       Inst.addOperand(MCOperand::createImm(Val));
       setImmKindConst();
       return;
@@ -2296,12 +2311,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     return;
 
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
+    assert(isSafeTruncation(Val, 16));
+    assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
+    Inst.addOperand(MCOperand::createImm(Val));
+    return;
+  }
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
-  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
     assert(isSafeTruncation(Val, 16));
-    assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
-                                        AsmParser->hasInv2PiInlineImm()));
+    assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
+                                          AsmParser->hasInv2PiInlineImm()));
 
     Inst.addOperand(MCOperand::createImm(Val));
     return;
@@ -3434,7 +3454,12 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
       return AMDGPU::isInlinableLiteralV2F16(Val);
 
-    return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
+    if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
+        OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
+        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16)
+      return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
+
+    llvm_unreachable("invalid operand type");
   }
   default:
     llvm_unreachable("invalid operand size");
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index abfa4a3531e8e1..08ad962e2607ad 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -462,8 +462,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
 
 // This must accept a 32-bit immediate value to correctly handle packed 16-bit
 // operations.
-static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
-                                  raw_ostream &O) {
+static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
+                               raw_ostream &O) {
   if (Imm == 0x3C00)
     O << "1.0";
   else if (Imm == 0xBC00)
@@ -488,7 +488,7 @@ static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
   return true;
 }
 
-void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
+void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, uint8_t OpType,
                                          const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   int16_t SImm = static_cast<int16_t>(Imm);
@@ -498,8 +498,16 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
   }
 
   uint16_t HImm = static_cast<uint16_t>(Imm);
-  if (printImmediateFloat16(HImm, STI, O))
-    return;
+  switch (OpType) {
+  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+    if (printImmediateFP16(HImm, STI, O))
+      return;
+    break;
+  default:
+    llvm_unreachable("bad operand type");
+  }
 
   uint64_t Imm16 = static_cast<uint16_t>(Imm);
   O << formatHex(Imm16);
@@ -525,7 +533,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
     if (isUInt<16>(Imm) &&
-        printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
+        printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
       return;
     break;
   default:
@@ -796,7 +804,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
     case AMDGPU::OPERAND_REG_IMM_FP16:
     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
-      printImmediate16(Op.getImm(), STI, O);
+      printImmediate16(Op.getImm(), OpTy, STI, O);
       break;
     case AMDGPU::OPERAND_REG_IMM_V2INT16:
     case AMDGPU::OPERAND_REG_IMM_V2FP16:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index e91ff86b219a0c..d14a36e77b6612 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -86,8 +86,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
                       raw_ostream &O);
   void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
                            raw_ostream &O);
-  void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
+  void printImmediate16(uint32_t Imm, uint8_t OpType,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
   void printImmediateV216(uint32_t Imm, uint8_t OpType,
                           const MCSubtargetInfo &STI, raw_ostream &O);
   bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c7628bd354309c..ae6f99ffc8620e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4196,7 +4196,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
       // constants in these cases
       int16_t Trunc = static_cast<int16_t>(Imm);
       return ST.has16BitInsts() &&
-             AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
+             AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
     }
 
     return false;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 33335ac75df768..8059d30fd07ac9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2655,6 +2655,40 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
          Val == 0x3118;   // 1/2pi
 }
 
+bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
+  if (!HasInv2Pi)
+    return false;
+  if (isInlinableIntLiteral(Literal))
+    return true;
+  uint16_t Val = static_cast<uint16_t>(Literal);
+  return Val == 0x3C00 || // 1.0
+         Val == 0xBC00 || // -1.0
+         Val == 0x3800 || // 0.5
+         Val == 0xB800 || // -0.5
+         Val == 0x4000 || // 2.0
+         Val == 0xC000 || // -2.0
+         Val == 0x4400 || // 4.0
+         Val == 0xC400 || // -4.0
+         Val == 0x3118;   // 1/2pi
+}
+
+bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
+  if (!HasInv2Pi)
+    return false;
+  if (isInlinableIntLiteral(Literal))
+    return true;
+  uint16_t Val = static_cast<uint16_t>(Literal);
+  return Val == 0x3F00 || // 0.5
+         Val == 0xBF00 || // -0.5
+         Val == 0x3F80 || // 1.0
+         Val == 0xBF80 || // -1.0
+         Val == 0x4000 || // 2.0
+         Val == 0xC000 || // -2.0
+         Val == 0x4080 || // 4.0
+         Val == 0xC080 || // -4.0
+         Val == 0x3E22;   // 1.0 / (2.0 * pi)
+}
+
 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
   // Unfortunately, the Instruction Set Architecture Reference Guide is
   // misleading about how the inline operands work for (packed) 16-bit
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f24b9f0e3615de..65ff10471d089c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1359,6 +1359,12 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
 LLVM_READNONE
 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
 
+LLVM_READNONE
+bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
+
 LLVM_READNONE
 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);