[llvm] [AMDGPU] Prevent folding of the negative i32 literals as i64 (PR #70274)

Thu Oct 26 12:37:04 PDT 2023

https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/70274

>From b73fbe5683c701b5dde72cd1fc55182cf16457c5 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Wed, 25 Oct 2023 17:45:09 -0700
Subject: [PATCH 1/4] [AMDGPU] Prevent folding of the negative i32 literals as
 i64

We can use sign extended 64-bit literals, but only for signed
operands. At the moment we do not know if an operand is signed.
Such operand will be encoded as its low 32 bits and then either
correctly sign extended or incorrectly zero extended by HW.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  9 +++++++++
 .../CodeGen/AMDGPU/folding-of-i32-as-i64.mir  | 20 +++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 827c2c156638468..355805e053f38df 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5500,6 +5500,15 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
     if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
         !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
       return false;
+
+    // FIXME: We can use sign extended 64-bit literals, but only for signed
+    //        operands. At the moment we do not know if an operand is signed.
+    //        Such operand will be encoded as its low 32 bits and then either
+    //        correctly sign extended or incorrectly zero extended by HW.
+    if (Is64BitOp && !Is64BitFPOp && isInt<32>(Imm) &&
+        (int32_t)Lo_32(Imm) < 0 &&
+        !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
+      return false;
   }
 
   // Handle non-register types that are treated like immediates.
diff --git a/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
new file mode 100644
index 000000000000000..7cfa67d86fbd94e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
@@ -0,0 +1,20 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s
+
+# The constant is 0xffffffff80000000. It is 64-bit negative constant, but it passes the test
+# isInt<32>(). Nonetheless it is not a legal literal for a binary or unsigned operand and
+# cannot be used right in the shift as HW will zero extend it.
+
+---
+name:            imm64_shift_int32_const
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_shift_int32_const
+    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -2147483648
+    ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744071562067968
+    %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...

>From 9eaa2673c53019817f53fc8580dac3798ab17f30 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Thu, 26 Oct 2023 11:21:09 -0700
Subject: [PATCH 2/4] Fold common condition in checks.

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 355805e053f38df..bc8ba2cdabbb924 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5497,18 +5497,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
                      OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
                      OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 ||
                      OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
-    if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
-        !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
-      return false;
+    if (Is64BitOp &&
+        !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
+      if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp))
+        return false;
 
-    // FIXME: We can use sign extended 64-bit literals, but only for signed
-    //        operands. At the moment we do not know if an operand is signed.
-    //        Such operand will be encoded as its low 32 bits and then either
-    //        correctly sign extended or incorrectly zero extended by HW.
-    if (Is64BitOp && !Is64BitFPOp && isInt<32>(Imm) &&
-        (int32_t)Lo_32(Imm) < 0 &&
-        !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
-      return false;
+      // FIXME: We can use sign extended 64-bit literals, but only for signed
+      //        operands. At the moment we do not know if an operand is signed.
+      //        Such operand will be encoded as its low 32 bits and then either
+      //        correctly sign extended or incorrectly zero extended by HW.
+      if (!Is64BitFPOp && isInt<32>(Imm) && (int32_t)Lo_32(Imm) < 0)
+        return false;
+    }
   }
 
   // Handle non-register types that are treated like immediates.

>From 71e5a19fb3b8a1f20f30ee15567e8748a54b947a Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Thu, 26 Oct 2023 11:39:58 -0700
Subject: [PATCH 3/4] Added more tests, positive and negative

---
 .../CodeGen/AMDGPU/folding-of-i32-as-i64.mir  | 70 ++++++++++++++++++-
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
index 7cfa67d86fbd94e..ef3f14f75649e9f 100644
--- a/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
+++ b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
@@ -6,10 +6,10 @@
 # cannot be used right in the shift as HW will zero extend it.
 
 ---
-name:            imm64_shift_int32_const
+name:            imm64_shift_int32_const_0xffffffff80000000
 body: |
   bb.0:
-    ; GCN-LABEL: name: imm64_shift_int32_const
+    ; GCN-LABEL: name: imm64_shift_int32_const_0xffffffff80000000
     ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -2147483648
     ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
     ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
@@ -18,3 +18,69 @@ body: |
     S_ENDPGM 0, implicit %1
 
 ...
+
+---
+name:            imm64_shift_int32_const_0xffffffff
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_shift_int32_const_0xffffffff
+    ; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 4294967295, 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
+    %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name:            imm64_shift_int32_const_0x80000000
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_shift_int32_const_0x80000000
+    ; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 2147483648, 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483648
+    %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name:            imm64_shift_int32_const_0x7fffffff
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_shift_int32_const_0x7fffffff
+    ; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 2147483647, 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483647
+    %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name:            imm64_shift_int32_const_0x1ffffffff
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_shift_int32_const_0x1ffffffff
+    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 8589934591
+    ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 8589934591
+    %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name:            imm64_shift_int32_const_0xffffffffffffffff
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_shift_int32_const_0xffffffffffffffff
+    ; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 -1, 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
+    %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...

>From 073baf814ccc535a6feb98c49aa1794813085f82 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Thu, 26 Oct 2023 12:36:31 -0700
Subject: [PATCH 4/4] Removed isInt<32>() check and added tests

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  2 +-
 .../AMDGPU/fold-short-64-bit-literals.mir     |  6 ++-
 .../CodeGen/AMDGPU/folding-of-i32-as-i64.mir  | 46 ++++++++++++++++++-
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index bc8ba2cdabbb924..284943eae46500d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5506,7 +5506,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
       //        operands. At the moment we do not know if an operand is signed.
       //        Such operand will be encoded as its low 32 bits and then either
       //        correctly sign extended or incorrectly zero extended by HW.
-      if (!Is64BitFPOp && isInt<32>(Imm) && (int32_t)Lo_32(Imm) < 0)
+      if (!Is64BitFPOp && (int32_t)Lo_32(Imm) < 0)
         return false;
     }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir
index 6e975c8a5370758..69c6a858162f39d 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir
@@ -84,6 +84,9 @@ body:             |
     SI_RETURN_TO_EPILOG %2
 ...
 
+# FIXME: This could be folded, but we do not know if operand of S_AND_B64 is signed or unsigned
+#        and if it will be sign or zero extended.
+
 ---
 name:            fold_uint_32bit_literal_sgpr
 tracksRegLiveness: true
@@ -92,7 +95,8 @@ body:             |
 
     ; GCN-LABEL: name: fold_uint_32bit_literal_sgpr
     ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
-    ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], 4294967295, implicit-def $scc
+    ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4294967295
+    ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], [[S_MOV_B64_]], implicit-def $scc
     ; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]]
     %0:sreg_64 = IMPLICIT_DEF
     %1:sreg_64 = S_MOV_B64 4294967295
diff --git a/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
index ef3f14f75649e9f..30cb88d5789fd33 100644
--- a/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
+++ b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir
@@ -24,7 +24,8 @@ name:            imm64_shift_int32_const_0xffffffff
 body: |
   bb.0:
     ; GCN-LABEL: name: imm64_shift_int32_const_0xffffffff
-    ; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 4294967295, 1, implicit-def $scc
+    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
+    ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
     ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
     %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
@@ -37,7 +38,8 @@ name:            imm64_shift_int32_const_0x80000000
 body: |
   bb.0:
     ; GCN-LABEL: name: imm64_shift_int32_const_0x80000000
-    ; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 2147483648, 1, implicit-def $scc
+    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483648
+    ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
     ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483648
     %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
@@ -84,3 +86,43 @@ body: |
     S_ENDPGM 0, implicit %1
 
 ...
+
+---
+name:            imm64_ashr_int32_const_0xffffffff
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_ashr_int32_const_0xffffffff
+    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
+    ; GCN-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[S_MOV_B]], 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
+    %1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name:            imm64_ashr_int32_const_0x7fffffff
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_ashr_int32_const_0x7fffffff
+    ; GCN: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 2147483647, 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483647
+    %1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name:            imm64_ashr_int32_const_0xffffffffffffffff
+body: |
+  bb.0:
+    ; GCN-LABEL: name: imm64_ashr_int32_const_0xffffffffffffffff
+    ; GCN: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 -1, 1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
+    %0:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
+    %1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc
+    S_ENDPGM 0, implicit %1
+
+...