[llvm] [GlobalISel] Constant-fold G_PTR_ADD with different type sizes (PR #81473)

Mon Feb 12 04:45:32 PST 2024

https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/81473

>From 9f2996b318a107ed9c050b59b5f234e3ec9c3fe8 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 12 Feb 2024 13:17:11 +0100
Subject: [PATCH 1/3] [GlobalISel] Constant-fold G_PTR_ADD with different type
 sizes

All other opcodes in the list are constrained to have the same type on both operands, but not G_PTR_ADD.

Fixes  #81464
---
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |  9 ++++++++-
 .../combine-extract-vector-load.mir           | 20 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir

diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 26fd12f9e51c43..d693316dc6e9d7 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -660,8 +660,15 @@ std::optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode,
   default:
     break;
   case TargetOpcode::G_ADD:
-  case TargetOpcode::G_PTR_ADD:
     return C1 + C2;
+  case TargetOpcode::G_PTR_ADD: {
+    // Types can be of different width here.
+    if (C1.getBitWidth() < C2.getBitWidth())
+      return C1.zext(C1.getBitWidth()) + C2;
+    if (C1.getBitWidth() > C2.getBitWidth())
+      return C2.zext(C1.getBitWidth()) + C1;
+    return C1 + C2;
+  }
   case TargetOpcode::G_AND:
     return C1 & C2;
   case TargetOpcode::G_ASHR:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
new file mode 100644
index 00000000000000..13be65612fa855
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
@@ -0,0 +1,20 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            test_ptradd_crash
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_ptradd_crash
+    ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
+    ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
+    ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+    %1:_(p1) = G_CONSTANT i64 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<4 x s32>) = G_LOAD %1 :: (load (<4 x s32>) from `ptr addrspace(1) null`, addrspace 1)
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+    $sgpr0 = COPY %2
+    SI_RETURN_TO_EPILOG implicit $sgpr0
+...

>From 392468e2beffe4cb47e7c99922ddf8a6ce1a72da Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 12 Feb 2024 13:29:25 +0100
Subject: [PATCH 2/3] fix zext

---
 llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index d693316dc6e9d7..6c11e999625d85 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -664,7 +664,7 @@ std::optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode,
   case TargetOpcode::G_PTR_ADD: {
     // Types can be of different width here.
     if (C1.getBitWidth() < C2.getBitWidth())
-      return C1.zext(C1.getBitWidth()) + C2;
+      return C1.zext(C2.getBitWidth()) + C2;
     if (C1.getBitWidth() > C2.getBitWidth())
       return C2.zext(C1.getBitWidth()) + C1;
     return C1 + C2;

>From 1c2683a617529c8a13feae02c4f57d76ed4da1f8 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 12 Feb 2024 13:45:20 +0100
Subject: [PATCH 3/3] fix case where C2 > C1

---
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |  6 ++--
 .../combine-extract-vector-load.mir           | 28 ++++++++++++++++---
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 6c11e999625d85..7da6eb96ecbf1a 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -663,10 +663,12 @@ std::optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode,
     return C1 + C2;
   case TargetOpcode::G_PTR_ADD: {
     // Types can be of different width here.
-    if (C1.getBitWidth() < C2.getBitWidth())
-      return C1.zext(C2.getBitWidth()) + C2;
     if (C1.getBitWidth() > C2.getBitWidth())
       return C2.zext(C1.getBitWidth()) + C1;
+    // We always need to return something the same size as C1, so
+    // truncate in this case.
+    if (C1.getBitWidth() < C2.getBitWidth())
+      return C1 + C2.trunc(C1.getBitWidth());
     return C1 + C2;
   }
   case TargetOpcode::G_AND:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
index 13be65612fa855..aa72a9ec06ede5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir
@@ -1,18 +1,38 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
 
+# Tries to emit a foldable G_PTR_ADD with (p1, s32) operands.
 ---
-name:            test_ptradd_crash
+name:            test_ptradd_crash__offset_smaller
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; CHECK-LABEL: name: test_ptradd_crash
-    ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+    ; CHECK-LABEL: name: test_ptradd_crash__offset_smaller
+    ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
     ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
     ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
     ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
     %1:_(p1) = G_CONSTANT i64 0
-    %3:_(s32) = G_CONSTANT i32 0
+    %3:_(s32) = G_CONSTANT i32 3
+    %0:_(<4 x s32>) = G_LOAD %1 :: (load (<4 x s32>) from `ptr addrspace(1) null`, addrspace 1)
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+    $sgpr0 = COPY %2
+    SI_RETURN_TO_EPILOG implicit $sgpr0
+...
+
+# Tries to emit a foldable G_PTR_ADD with (p1, s128) operands.
+---
+name:            test_ptradd_crash__offset_wider
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_ptradd_crash__offset_wider
+    ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
+    ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
+    ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+    %1:_(p1) = G_CONSTANT i64 0
+    %3:_(s128) = G_CONSTANT i128 3
     %0:_(<4 x s32>) = G_LOAD %1 :: (load (<4 x s32>) from `ptr addrspace(1) null`, addrspace 1)
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3
     $sgpr0 = COPY %2