[llvm] [WIP][AMDGPU] Split `isInlinableLiteral16` into three and call the specific version if possible (PR #81345)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 12 11:25:29 PST 2024
================
@@ -2655,6 +2655,40 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3118; // 1/2pi
}
+bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
+ if (!HasInv2Pi)
+ return false;
+ if (isInlinableIntLiteral(Literal))
+ return true;
+ uint16_t Val = static_cast<uint16_t>(Literal);
+ return Val == 0x3C00 || // 1.0
+ Val == 0xBC00 || // -1.0
+ Val == 0x3800 || // 0.5
+ Val == 0xB800 || // -0.5
+ Val == 0x4000 || // 2.0
+ Val == 0xC000 || // -2.0
+ Val == 0x4400 || // 4.0
+ Val == 0xC400 || // -4.0
+ Val == 0x3118; // 1/2pi
+}
+
+bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
+ if (!HasInv2Pi)
+ return false;
+ if (isInlinableIntLiteral(Literal))
+ return true;
+ uint16_t Val = static_cast<uint16_t>(Literal);
+ return Val == 0x3F00 || // 0.5
+ Val == 0xBF00 || // -0.5
+ Val == 0x3F80 || // 1.0
+ Val == 0xBF80 || // -1.0
+ Val == 0x4000 || // 2.0
+ Val == 0xC000 || // -2.0
+ Val == 0x4080 || // 4.0
+ Val == 0xC080 || // -4.0
+ Val == 0x3E22; // 1.0 / (2.0 * pi)
----------------
shiltian wrote:
On p.p.608 of sp3 gfx11 document, for 1/2 pi of bf16, it lists `0x3e22` (truncated from 32-bit format with no rounding).
https://github.com/llvm/llvm-project/pull/81345
More information about the llvm-commits
mailing list