[llvm] [InstCombine] Relax the same-underlying-object constraint for the GEP canonicalization (PR #76583)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 31 07:37:16 PST 2023


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/76583

>From 17833261101533fcb6a4ba71fa02f9e5b36cb4fb Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 30 Dec 2023 15:33:01 +0800
Subject: [PATCH 1/5] [InstCombine] Add pre-commit tests. NFC.

---
 .../Transforms/InstCombine/getelementptr.ll   | 90 +++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 7d67f2583aa24d..18373a9222f8d8 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1537,4 +1537,94 @@ define ptr @gep_ashr_without_exact(ptr %p, i64 %off) {
   ret ptr %ptr
 }
 
+define i1 @test_only_used_by_icmp(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test_only_used_by_icmp(
+; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP]], [[C:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %pa = ptrtoint ptr %a to i64
+  %pb = ptrtoint ptr %b to i64
+  %sub = sub i64 %pb, %pa
+  %gep = getelementptr i8, ptr %a, i64 %sub
+  %cmp = icmp eq ptr %gep, %c
+  ret i1 %cmp
+}
+
+define i64 @test_only_used_by_ptrtoint(ptr %a, ptr %b) {
+; CHECK-LABEL: @test_only_used_by_ptrtoint(
+; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
+; CHECK-NEXT:    [[VAL:%.*]] = ptrtoint ptr [[GEP]] to i64
+; CHECK-NEXT:    ret i64 [[VAL]]
+;
+  %pa = ptrtoint ptr %a to i64
+  %pb = ptrtoint ptr %b to i64
+  %sub = sub i64 %pb, %pa
+  %gep = getelementptr i8, ptr %a, i64 %sub
+  %val = ptrtoint ptr %gep to i64
+  ret i64 %val
+}
+
+define i64 @test_used_by_both(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test_used_by_both(
+; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[VAL:%.*]] = ptrtoint ptr [[GEP]] to i64
+; CHECK-NEXT:    ret i64 [[VAL]]
+; CHECK:       if.else:
+; CHECK-NEXT:    ret i64 0
+;
+  %pa = ptrtoint ptr %a to i64
+  %pb = ptrtoint ptr %b to i64
+  %sub = sub i64 %pb, %pa
+  %gep = getelementptr i8, ptr %a, i64 %sub
+  %cmp = icmp eq ptr %gep, %c
+  br i1 %cmp, label %if.then, label %if.else
+if.then:
+  %val = ptrtoint ptr %gep to i64
+  ret i64 %val
+if.else:
+  ret i64 0
+}
+
+; Negative tests
+
+define i64 @test_used_by_both_invalid(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test_used_by_both_invalid(
+; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[VAL:%.*]] = load i64, ptr [[GEP]], align 8
+; CHECK-NEXT:    ret i64 [[VAL]]
+; CHECK:       if.else:
+; CHECK-NEXT:    ret i64 0
+;
+  %pa = ptrtoint ptr %a to i64
+  %pb = ptrtoint ptr %b to i64
+  %sub = sub i64 %pb, %pa
+  %gep = getelementptr i8, ptr %a, i64 %sub
+  %cmp = icmp eq ptr %gep, %c
+  br i1 %cmp, label %if.then, label %if.else
+if.then:
+  %val = load i64, ptr %gep, align 8
+  ret i64 %val
+if.else:
+  ret i64 0
+}
+
 !0 = !{!"branch_weights", i32 2, i32 10}

>From eaf4836f014fb295baca4563260da7bf27ff7f51 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 30 Dec 2023 15:34:39 +0800
Subject: [PATCH 2/5] [InstCombine] Relax the same-underlying-object constraint
 for the GEP canonicalization

---
 .../InstCombine/InstructionCombining.cpp      | 11 +++++++---
 .../Transforms/InstCombine/getelementptr.ll   | 20 ++++---------------
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index df393d72a85bf2..5b2b7d84e4717b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2471,13 +2471,18 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
       if (TyAllocSize == 1) {
         // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
-        // but only if both point to the same underlying object (otherwise
-        // provenance is not necessarily retained).
+        // but only if the result pointer is only used as if it were an integer,
+        // or both point to the same underlying object (otherwise provenance is
+        // not necessarily retained).
         Value *X = GEP.getPointerOperand();
         Value *Y;
         if (match(GEP.getOperand(1),
                   m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
-            getUnderlyingObject(X) == getUnderlyingObject(Y))
+            (all_of(GEP.users(),
+                    [](User *U) {
+                      return isa<ICmpInst>(U) || isa<PtrToIntInst>(U);
+                    }) ||
+             getUnderlyingObject(X) == getUnderlyingObject(Y)))
           return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
       } else {
         // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 18373a9222f8d8..22c918e0d4a3ea 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1539,11 +1539,7 @@ define ptr @gep_ashr_without_exact(ptr %p, i64 %off) {
 
 define i1 @test_only_used_by_icmp(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test_only_used_by_icmp(
-; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
-; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B:%.*]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP]], [[C:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %pa = ptrtoint ptr %a to i64
@@ -1556,11 +1552,7 @@ define i1 @test_only_used_by_icmp(ptr %a, ptr %b, ptr %c) {
 
 define i64 @test_only_used_by_ptrtoint(ptr %a, ptr %b) {
 ; CHECK-LABEL: @test_only_used_by_ptrtoint(
-; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
-; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B:%.*]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
-; CHECK-NEXT:    [[VAL:%.*]] = ptrtoint ptr [[GEP]] to i64
+; CHECK-NEXT:    [[VAL:%.*]] = ptrtoint ptr [[B:%.*]] to i64
 ; CHECK-NEXT:    ret i64 [[VAL]]
 ;
   %pa = ptrtoint ptr %a to i64
@@ -1573,14 +1565,10 @@ define i64 @test_only_used_by_ptrtoint(ptr %a, ptr %b) {
 
 define i64 @test_used_by_both(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test_used_by_both(
-; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
-; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B:%.*]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP]], [[C:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[VAL:%.*]] = ptrtoint ptr [[GEP]] to i64
+; CHECK-NEXT:    [[VAL:%.*]] = ptrtoint ptr [[B]] to i64
 ; CHECK-NEXT:    ret i64 [[VAL]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret i64 0

>From 590215ff99666a00514a9eb40ebe79fa486b161f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 31 Dec 2023 18:07:23 +0800
Subject: [PATCH 3/5] fixup! [InstCombine] Relax the same-underlying-object
 constraint for the GEP canonicalization

---
 .../InstCombine/InstructionCombining.cpp      | 32 +++++++++++++++----
 .../Transforms/InstCombine/getelementptr.ll   |  8 ++---
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5b2b7d84e4717b..33585afec5f802 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2477,13 +2477,31 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         Value *X = GEP.getPointerOperand();
         Value *Y;
         if (match(GEP.getOperand(1),
-                  m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
-            (all_of(GEP.users(),
-                    [](User *U) {
-                      return isa<ICmpInst>(U) || isa<PtrToIntInst>(U);
-                    }) ||
-             getUnderlyingObject(X) == getUnderlyingObject(Y)))
-          return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
+                  m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X))))) {
+          std::optional<bool> HasSameUnderlyingObjectCache;
+          auto HasSameUnderlyingObject = [&] {
+            if (!HasSameUnderlyingObjectCache)
+              HasSameUnderlyingObjectCache =
+                  getUnderlyingObject(X) == getUnderlyingObject(Y);
+            return *HasSameUnderlyingObjectCache;
+          };
+          std::optional<Value *> CastedValueCache;
+          auto GetCastedValue = [&] {
+            if (!CastedValueCache)
+              CastedValueCache =
+                  Builder.CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
+            return *CastedValueCache;
+          };
+
+          bool Changed = false;
+          for (User *U : GEP.users())
+            if (isa<ICmpInst>(U) || isa<PtrToIntInst>(U) ||
+                HasSameUnderlyingObject()) {
+              U->replaceUsesOfWith(&GEP, GetCastedValue());
+              Changed = true;
+            }
+          return Changed ? &GEP : nullptr;
+        }
       } else {
         // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
         Value *V;
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 22c918e0d4a3ea..373b7f5f2fc0a5 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1590,13 +1590,13 @@ if.else:
 
 define i64 @test_used_by_both_invalid(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test_used_by_both_invalid(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B]] to i64
 ; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
-; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B:%.*]] to i64
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP]], [[C:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
 ; CHECK-NEXT:    [[VAL:%.*]] = load i64, ptr [[GEP]], align 8
 ; CHECK-NEXT:    ret i64 [[VAL]]
 ; CHECK:       if.else:

>From 32263bdf1e57261e1c8789d787f63fe35d533b05 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 31 Dec 2023 20:38:42 +0800
Subject: [PATCH 4/5] fixup! [InstCombine] Relax the same-underlying-object
 constraint for the GEP canonicalization

---
 .../InstCombine/InstructionCombining.cpp      | 33 +++++++------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 33585afec5f802..c453a0f1fa50f5 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2477,29 +2477,18 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         Value *X = GEP.getPointerOperand();
         Value *Y;
         if (match(GEP.getOperand(1),
-                  m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X))))) {
-          std::optional<bool> HasSameUnderlyingObjectCache;
-          auto HasSameUnderlyingObject = [&] {
-            if (!HasSameUnderlyingObjectCache)
-              HasSameUnderlyingObjectCache =
-                  getUnderlyingObject(X) == getUnderlyingObject(Y);
-            return *HasSameUnderlyingObjectCache;
-          };
-          std::optional<Value *> CastedValueCache;
-          auto GetCastedValue = [&] {
-            if (!CastedValueCache)
-              CastedValueCache =
-                  Builder.CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
-            return *CastedValueCache;
-          };
-
+                  m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
+            GEPType == Y->getType()) {
+          bool hasSameUnderlyingObject =
+              getUnderlyingObject(X) == getUnderlyingObject(Y);
           bool Changed = false;
-          for (User *U : GEP.users())
-            if (isa<ICmpInst>(U) || isa<PtrToIntInst>(U) ||
-                HasSameUnderlyingObject()) {
-              U->replaceUsesOfWith(&GEP, GetCastedValue());
-              Changed = true;
-            }
+          GEP.replaceUsesWithIf(Y, [&](Use &U) {
+            bool ShouldReplace = hasSameUnderlyingObject ||
+                                 isa<ICmpInst>(U.getUser()) ||
+                                 isa<PtrToIntInst>(U.getUser());
+            Changed |= ShouldReplace;
+            return ShouldReplace;
+          });
           return Changed ? &GEP : nullptr;
         }
       } else {

>From 67c89f168f5b8370eb5d7e461b6c2bf6664e85d9 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw at qq.com>
Date: Sun, 31 Dec 2023 23:37:09 +0800
Subject: [PATCH 5/5] Update
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Co-authored-by: Nikita Popov <github at npopov.com>
---
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c453a0f1fa50f5..f2909ce1425a86 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2479,7 +2479,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         if (match(GEP.getOperand(1),
                   m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
             GEPType == Y->getType()) {
-          bool hasSameUnderlyingObject =
+          bool HasSameUnderlyingObject =
               getUnderlyingObject(X) == getUnderlyingObject(Y);
           bool Changed = false;
           GEP.replaceUsesWithIf(Y, [&](Use &U) {



More information about the llvm-commits mailing list