[llvm] [InstComb] Fold inttoptr (add (ptrtoint %B), %O) -> GEP for ICMP users. (PR #153421)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 21 07:45:25 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/153421

>From bf9d00b2852233952dd3a9597b9b1d51b569497c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 13 Aug 2025 15:11:44 +0100
Subject: [PATCH 1/3] [InstComb] Fold inttoptr (add (ptrtoint %B), %O) -> GEP
 for ICMP users.

Replace inttoptr (add (ptrtoint %B), %O) with (getelementptr i8, %B, %o)
if all users are ICmp instruction, which in turn means only the address
value is compared. We should be able to do this, if the src pointer,
the integer type and the destination pointer types have the same
bitwidth and address space.

A common source of such (inttoptr (add (ptrtoint %B), %O)) is from
various iterations in libc++.

In practice this triggers in a number of files in Clang and various open
source projects, including cppcheck, diamond, llama and more.

Alive2 Proof with constant offset: https://alive2.llvm.org/ce/z/K_5N_B
---
 .../InstCombine/InstCombineCasts.cpp          | 16 +++++++++++++++
 .../InstCombine/fold-bin-operand.ll           | 20 +++++++------------
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 801ac00fa8fa8..b2dd8533f0326 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2072,6 +2072,22 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
     return new IntToPtrInst(P, CI.getType());
   }
 
+  // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
+  // (getelementptr i8, %Base, %Offset) if all users are ICmps.
+  Value *Base;
+  Value *Offset;
+  if (match(CI.getOperand(0),
+            m_Add(m_PtrToInt(m_Value(Base)), m_Value(Offset))) &&
+      all_of(CI.users(), IsaPred<ICmpInst>)) {
+    Type *BasePtrTy = Base->getType();
+    if (CI.getType()->getPointerAddressSpace() ==
+            BasePtrTy->getPointerAddressSpace() &&
+        DL.getTypeSizeInBits(BasePtrTy) ==
+            DL.getTypeSizeInBits(CI.getSrcTy())) {
+      return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
+    }
+  }
+
   if (Instruction *I = commonCastTransforms(CI))
     return I;
 
diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
index 45d09f41fb4c5..51ea80ef847b2 100644
--- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -32,10 +32,8 @@ define i32 @g(i32 %x) {
 
 define i1 @inttoptr_add_ptrtoint_used_by_single_icmp(ptr %src, ptr %p2) {
 ; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp(
-; CHECK-NEXT:    [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[I]], 10
-; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10
+; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %i = ptrtoint ptr %src to i64
@@ -181,10 +179,8 @@ define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_in_different_bb(i1 %bc, ptr
 ; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_in_different_bb(
 ; CHECK-NEXT:    br i1 [[BC:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[I]], 10
-; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10
+; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ; CHECK:       else:
 ; CHECK-NEXT:    ret i1 false
@@ -204,11 +200,9 @@ else:
 
 define i1 @inttoptr_add_ptrtoint_used_by_multiple_icmps(ptr %src, ptr %p2, ptr %p3) {
 ; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_multiple_icmps(
-; CHECK-NEXT:    [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[I]], 10
-; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT:    [[C_1:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
-; CHECK-NEXT:    [[C_2:%.*]] = icmp eq ptr [[P3:%.*]], [[P]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10
+; CHECK-NEXT:    [[C_1:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
+; CHECK-NEXT:    [[C_2:%.*]] = icmp eq ptr [[P2]], [[P3:%.*]]
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i1 [[C_1]], [[C_2]]
 ; CHECK-NEXT:    ret i1 [[XOR]]
 ;

>From 3ef7fdd59ec575117d24f975afbbe20feb24f61d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 13 Aug 2025 21:27:31 +0100
Subject: [PATCH 2/3] !fixup use m_PtrToIntSameSize

---
 llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index b2dd8533f0326..3b5375492c4a7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2077,15 +2077,11 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
   Value *Base;
   Value *Offset;
   if (match(CI.getOperand(0),
-            m_Add(m_PtrToInt(m_Value(Base)), m_Value(Offset))) &&
+            m_Add(m_PtrToIntSameSize(DL, m_Value(Base)), m_Value(Offset))) &&
+      CI.getType()->getPointerAddressSpace() ==
+          Base->getType()->getPointerAddressSpace() &&
       all_of(CI.users(), IsaPred<ICmpInst>)) {
-    Type *BasePtrTy = Base->getType();
-    if (CI.getType()->getPointerAddressSpace() ==
-            BasePtrTy->getPointerAddressSpace() &&
-        DL.getTypeSizeInBits(BasePtrTy) ==
-            DL.getTypeSizeInBits(CI.getSrcTy())) {
-      return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
-    }
+    return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
   }
 
   if (Instruction *I = commonCastTransforms(CI))

>From c283423331bc956c04d19927b8994cdcc7d03f62 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 21 Aug 2025 15:38:16 +0100
Subject: [PATCH 3/3] !fixup use m_c_Add

---
 .../InstCombine/InstCombineCasts.cpp           |  2 +-
 .../Transforms/InstCombine/fold-bin-operand.ll | 18 ++++++------------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3b5375492c4a7..7d949a1679850 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2077,7 +2077,7 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
   Value *Base;
   Value *Offset;
   if (match(CI.getOperand(0),
-            m_Add(m_PtrToIntSameSize(DL, m_Value(Base)), m_Value(Offset))) &&
+            m_c_Add(m_PtrToIntSameSize(DL, m_Value(Base)), m_Value(Offset))) &&
       CI.getType()->getPointerAddressSpace() ==
           Base->getType()->getPointerAddressSpace() &&
       all_of(CI.users(), IsaPred<ICmpInst>)) {
diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
index 51ea80ef847b2..68e0703165bfd 100644
--- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -45,10 +45,8 @@ define i1 @inttoptr_add_ptrtoint_used_by_single_icmp(ptr %src, ptr %p2) {
 
 define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_operands_swapped(ptr %src, ptr %p2) {
 ; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_operands_swapped(
-; CHECK-NEXT:    [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[I]], 10
-; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10
+; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %i = ptrtoint ptr %src to i64
@@ -60,10 +58,8 @@ define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_operands_swapped(ptr %src,
 
 define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset(ptr %src, i64 %off, ptr %p2) {
 ; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset(
-; CHECK-NEXT:    [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[OFF:%.*]], [[I]]
-; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFF:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %i = ptrtoint ptr %src to i64
@@ -75,10 +71,8 @@ define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset(ptr %src, i
 
 define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset_operands_swapped(ptr %src, i64 %off, ptr %p2) {
 ; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_constant_offset_operands_swapped(
-; CHECK-NEXT:    [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[OFF:%.*]], [[I]]
-; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFF:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %i = ptrtoint ptr %src to i64



More information about the llvm-commits mailing list