[llvm] [InstCombine] Try to fold add into GEP x, C (PR #85090)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 18 01:01:01 PDT 2024


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/85090

>From 74c69fddf6b51703414cc142571d22ad786ad1ce Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 13 Mar 2024 11:54:17 +0000
Subject: [PATCH 1/3] [InstCombine] Add Gep + Add tests. NFC

---
 llvm/test/Transforms/InstCombine/gepadd.ll | 115 +++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/gepadd.ll

diff --git a/llvm/test/Transforms/InstCombine/gepadd.ll b/llvm/test/Transforms/InstCombine/gepadd.ll
new file mode 100644
index 00000000000000..328b0d0de0883e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gepadd.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+define ptr @add1(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @add1(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @sub1(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @sub1(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = sub i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @add10(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @add10(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 10
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @nooff(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @nooff(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 10
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 %n
+  ret ptr %g
+}
+
+define ptr @inbounds(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @inbounds(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr inbounds [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+%struct.Struct = type { i32, i32, ptr }
+define ptr @struct(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @struct(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [[STRUCT_STRUCT:%.*]], ptr [[P]], i64 [[A]], i32 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, -1
+  %g = getelementptr %struct.Struct, ptr %p, i64 %a, i32 1
+  ret ptr %g
+}
+
+define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @lessargs(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x i32]]], ptr %p, i64 0, i64 %a, i64 1
+  ret ptr %g
+}
+
+define ptr @twice(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @twice(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[B:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 [[B]], i64 -3
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %b = sub i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr %p, i64 0, i64 %a, i64 1, i64 %b, i64 -3
+  ret ptr %g
+}
+
+define ptr @simpler(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @simpler(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 8
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 8
+  %g = getelementptr [16 x [8 x i32]], ptr %p, i64 0, i64 %a, i64 1
+  ret ptr %g
+}

>From 90b032dadd6ac34714040a06573265c0162ea052 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 13 Mar 2024 14:37:06 +0000
Subject: [PATCH 2/3] [InstCombine] Try to fold add into GEP

This attempts to GEP p, (x + C1), C2 with GEP p, x, C2+C1*S, removing the need
for the add by folding it into the following constant offset in the GEP. It
does not attempt to alter inbounds geps, to avoid the need to drop the
inbounds.

Example proof using a much reduced pointer address space:
https://alive2.llvm.org/ce/z/hG8xfF
---
 .../InstCombine/InstructionCombining.cpp      | 31 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/gepadd.ll    | 19 ++++--------
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5d3e41fb5f29c4..bebc3207f13150 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2938,6 +2938,37 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         }
       }
     }
+
+    // Try to replace GEP p, (x + C1), C2 with GEP p, x, C2+C1*S
+    if (GEP.getNumIndices() > 1) {
+      gep_type_iterator GTI = gep_type_begin(GEP);
+      for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end() - 1;
+           I != E; ++I, ++GTI) {
+        if (!GTI.isSequential())
+          break;
+        Value *X;
+        const APInt *C1, *C2;
+        User::op_iterator Next = std::next(I);
+        if (match(I->get(), m_Add(m_Value(X), m_APInt(C1))) &&
+            match(Next->get(), m_APInt(C2))) {
+          TypeSize Scale1 = GTI.getSequentialElementStride(DL);
+          if (Scale1.isScalable() || !(++GTI).isSequential())
+            break;
+          TypeSize Scale2 = GTI.getSequentialElementStride(DL);
+          if (Scale2.isScalable())
+            break;
+
+          // Update the GEP instruction indices, and add Add to the worklist
+          // so that it can be DCEd.
+          Instruction *Add = cast<Instruction>(*I);
+          *I = X;
+          *Next = ConstantInt::get((*Next)->getType(),
+                                    *C2 + *C1 * (Scale1 / Scale2));
+          addToWorklist(Add);
+          return &GEP;
+        }
+      }
+    }
   }
 
   if (Instruction *R = foldSelectGEP(GEP, Builder))
diff --git a/llvm/test/Transforms/InstCombine/gepadd.ll b/llvm/test/Transforms/InstCombine/gepadd.ll
index 328b0d0de0883e..895bb94ca99576 100644
--- a/llvm/test/Transforms/InstCombine/gepadd.ll
+++ b/llvm/test/Transforms/InstCombine/gepadd.ll
@@ -6,8 +6,7 @@ target datalayout = "e-p:64:64:64"
 define ptr @add1(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @add1(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 11, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -18,8 +17,7 @@ define ptr @add1(ptr %p, i64 %o, i64 %n) {
 define ptr @sub1(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @sub1(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 -9, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = sub i64 %o, 1
@@ -30,8 +28,7 @@ define ptr @sub1(ptr %p, i64 %o, i64 %n) {
 define ptr @add10(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @add10(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 101, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 10
@@ -79,8 +76,7 @@ define ptr @struct(ptr %p, i64 %o, i64 %n) {
 define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @lessargs(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[O]], i64 11
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -91,9 +87,7 @@ define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
 define ptr @twice(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @twice(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[B:%.*]] = add i64 [[O]], -1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 [[B]], i64 -3
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[O]], i64 11, i64 [[O]], i64 -13
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -105,8 +99,7 @@ define ptr @twice(ptr %p, i64 %o, i64 %n) {
 define ptr @simpler(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @simpler(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 8
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[O]], i64 65
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 8

>From 6c8c91ee61e1df2ac1421ab5f8b44b91d196984e Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 18 Mar 2024 07:56:10 +0000
Subject: [PATCH 3/3] Move to dropping inbounds

---
 .../InstCombine/InstructionCombining.cpp      | 53 ++++++++++---------
 llvm/test/Transforms/InstCombine/gepadd.ll    |  3 +-
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index bebc3207f13150..5d699f4c73427e 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2938,35 +2938,36 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         }
       }
     }
+  }
 
-    // Try to replace GEP p, (x + C1), C2 with GEP p, x, C2+C1*S
-    if (GEP.getNumIndices() > 1) {
-      gep_type_iterator GTI = gep_type_begin(GEP);
-      for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end() - 1;
-           I != E; ++I, ++GTI) {
-        if (!GTI.isSequential())
+  // Try to replace GEP p, (x + C1), C2 with GEP p, x, C2+C1*S
+  if (GEP.getNumIndices() > 1) {
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end() - 1; I != E;
+         ++I, ++GTI) {
+      if (!GTI.isSequential())
+        break;
+      Value *X;
+      const APInt *C1, *C2;
+      User::op_iterator Next = std::next(I);
+      if (match(I->get(), m_Add(m_Value(X), m_APInt(C1))) &&
+          match(Next->get(), m_APInt(C2))) {
+        TypeSize Scale1 = GTI.getSequentialElementStride(DL);
+        if (Scale1.isScalable() || !(++GTI).isSequential())
+          break;
+        TypeSize Scale2 = GTI.getSequentialElementStride(DL);
+        if (Scale2.isScalable())
           break;
-        Value *X;
-        const APInt *C1, *C2;
-        User::op_iterator Next = std::next(I);
-        if (match(I->get(), m_Add(m_Value(X), m_APInt(C1))) &&
-            match(Next->get(), m_APInt(C2))) {
-          TypeSize Scale1 = GTI.getSequentialElementStride(DL);
-          if (Scale1.isScalable() || !(++GTI).isSequential())
-            break;
-          TypeSize Scale2 = GTI.getSequentialElementStride(DL);
-          if (Scale2.isScalable())
-            break;
 
-          // Update the GEP instruction indices, and add Add to the worklist
-          // so that it can be DCEd.
-          Instruction *Add = cast<Instruction>(*I);
-          *I = X;
-          *Next = ConstantInt::get((*Next)->getType(),
-                                    *C2 + *C1 * (Scale1 / Scale2));
-          addToWorklist(Add);
-          return &GEP;
-        }
+        // Update the GEP instruction indices, and add Add to the worklist
+        // so that it can be DCEd.
+        Instruction *Add = cast<Instruction>(*I);
+        *I = X;
+        *Next =
+            ConstantInt::get((*Next)->getType(), *C2 + *C1 * (Scale1 / Scale2));
+        addToWorklist(Add);
+        GEP.setIsInBounds(false);
+        return &GEP;
       }
     }
   }
diff --git a/llvm/test/Transforms/InstCombine/gepadd.ll b/llvm/test/Transforms/InstCombine/gepadd.ll
index 895bb94ca99576..a22f2b549acdbe 100644
--- a/llvm/test/Transforms/InstCombine/gepadd.ll
+++ b/llvm/test/Transforms/InstCombine/gepadd.ll
@@ -51,8 +51,7 @@ define ptr @nooff(ptr %p, i64 %o, i64 %n) {
 define ptr @inbounds(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @inbounds(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 11, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1



More information about the llvm-commits mailing list