[llvm] [AMDGPU] Prevent folding of the negative i32 literals as i64 (PR #70274)

Thu Oct 26 11:21:50 PDT 2023

https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/70274

>From b73fbe5683c701b5dde72cd1fc55182cf16457c5 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Wed, 25 Oct 2023 17:45:09 -0700
Subject: [PATCH 1/2] [AMDGPU] Prevent folding of the negative i32 literals as
 i64

We can use sign extended 64-bit literals, but only for signed
operands. At the moment we do not know if an operand is signed.
Such operand will be encoded as its low 32 bits and then either
correctly sign extended or incorrectly zero extended by HW.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  9 +++++++++
 .../CodeGen/AMDGPU/folding-of-i32-as-i64.mir  | 20 +++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 827c2c156638468..355805e053f38df 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5500,6 +5500,15 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
     if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
         !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
       return false;
+
+    // FIXME: We can use sign extended 64-bit literals, but only for signed
+    //        operands. At the moment we do not know if an operand is signed.
+    //        Such operand will be encoded as its low 32 bits and then either
+    //        correctly sign extended or incorrectly zero extended by HW.
+    if (Is64BitOp && !Is64BitFPOp && isInt<32>(Imm) &&
+        (int32_t)Lo_32(Imm) < 0 &&
+        !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
+      return false;
   }
 
   // Handle non-register types that are treated like immediates.
diff --git a/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
new file mode 100644
index 000000000000000..7cfa67d86fbd94e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
@@ -0,0 +1,20 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s
+
+# The constant is 0xffffffff80000000. It is 64-bit negative constant, but it passes the test
+# isInt<32>(). Nonetheless it is not a legal literal for a binary or unsigned operand and
+# cannot be used right in the shift as HW will zero extend it.
+
+---
+name:            imm64_shift_int32_const
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_shift_int32_const
+    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -2147483648
+    ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744071562067968
+    %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...

>From 9eaa2673c53019817f53fc8580dac3798ab17f30 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Thu, 26 Oct 2023 11:21:09 -0700
Subject: [PATCH 2/2] Fold common condition in checks.

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 355805e053f38df..bc8ba2cdabbb924 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5497,18 +5497,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
                      OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
                      OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 ||
                      OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
-    if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
-        !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
-      return false;
+    if (Is64BitOp &&
+        !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
+      if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp))
+        return false;
 
-    // FIXME: We can use sign extended 64-bit literals, but only for signed
-    //        operands. At the moment we do not know if an operand is signed.
-    //        Such operand will be encoded as its low 32 bits and then either
-    //        correctly sign extended or incorrectly zero extended by HW.
-    if (Is64BitOp && !Is64BitFPOp && isInt<32>(Imm) &&
-        (int32_t)Lo_32(Imm) < 0 &&
-        !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
-      return false;
+      // FIXME: We can use sign extended 64-bit literals, but only for signed
+      //        operands. At the moment we do not know if an operand is signed.
+      //        Such operand will be encoded as its low 32 bits and then either
+      //        correctly sign extended or incorrectly zero extended by HW.
+      if (!Is64BitFPOp && isInt<32>(Imm) && (int32_t)Lo_32(Imm) < 0)
+        return false;
+    }
   }
 
   // Handle non-register types that are treated like immediates.