[llvm] [InstComb] Try to convert inttoptr (add (ptrtoint %B), %O) to GEP. (PR #153421)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 13 13:28:39 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/153421
>From cbcfa5f84d2b99383604b1417ff28699130b8a13 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 13 Aug 2025 15:09:35 +0100
Subject: [PATCH 1/3] [InstCombine] Add tests for (inttoptr (add (ptrtoint
%Base), %Offset)).
---
.../InstCombine/fold-bin-operand.ll | 158 +++++++++++++++++-
1 file changed, 157 insertions(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
index f28262b2a77e0..38baf237d9223 100644
--- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
define i1 @f(i1 %x) {
; CHECK-LABEL: @f(
@@ -30,6 +30,162 @@ define i32 @g(i32 %x) {
ret i32 %b
}
+define i1 @inttoptr_add_ptrtoint_used_by_single_icmp(ptr %src, ptr %p2) {
+; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp(
+; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
+; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
+; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %i = ptrtoint ptr %src to i64
+ %a = add i64 %i, 10
+ %p = inttoptr i64 %a to ptr
+ %c = icmp eq ptr %p, %p2
+ ret i1 %c
+}
+
+define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_different_src_address_spaces(ptr addrspace(1) %src, ptr %p2) {
+; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_different_src_address_spaces(
+; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr addrspace(1) [[SRC:%.*]] to i64
+; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
+; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %i = ptrtoint ptr addrspace(1) %src to i64
+ %a = add i64 %i, 10
+ %p = inttoptr i64 %a to ptr
+ %c = icmp eq ptr %p, %p2
+ ret i1 %c
+}
+
+define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_different_dst_address_spaces(ptr %src, ptr addrspace(1) %p2) {
+; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_different_dst_address_spaces(
+; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
+; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
+; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr addrspace(1)
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr addrspace(1) [[P2:%.*]], [[P]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %i = ptrtoint ptr %src to i64
+ %a = add i64 %i, 10
+ %p = inttoptr i64 %a to ptr addrspace(1)
+ %c = icmp eq ptr addrspace(1) %p, %p2
+ ret i1 %c
+}
+
+define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_int_type_does_not_match_ptr_ty(ptr %src, ptr %p2) {
+; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_int_type_does_not_match_ptr_ty(
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
+; CHECK-NEXT: [[I:%.*]] = trunc i64 [[TMP1]] to i8
+; CHECK-NEXT: [[A:%.*]] = add i8 [[I]], 10
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[A]] to i64
+; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %i = ptrtoint ptr %src to i8
+ %a = add i8 %i, 10
+ %p = inttoptr i8 %a to ptr
+ %c = icmp eq ptr %p, %p2
+ ret i1 %c
+}
+
+define i1 @multiple_inttoptr_add_ptrtoint_used_by_single_icmp(ptr %src) {
+; CHECK-LABEL: @multiple_inttoptr_add_ptrtoint_used_by_single_icmp(
+; CHECK-NEXT: ret i1 false
+;
+ %i = ptrtoint ptr %src to i64
+ %a = add i64 %i, 10
+ %p = inttoptr i64 %a to ptr
+ %a.2 = add i64 %i, 11
+ %p.2 = inttoptr i64 %a.2 to ptr
+ %c = icmp eq ptr %p, %p.2
+ ret i1 %c
+}
+
+define i1 @multiple_inttoptr_add_ptrtoint_used_by_single_icmp_non_constant_offset(ptr %src, i64 %off.1, i64 %off.2) {
+; CHECK-LABEL: @multiple_inttoptr_add_ptrtoint_used_by_single_icmp_non_constant_offset(
+; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[OFF_1:%.*]], [[OFF_2:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %i = ptrtoint ptr %src to i64
+ %a = add i64 %i, %off.1
+ %p = inttoptr i64 %a to ptr
+ %a.2 = add i64 %i, %off.2
+ %p.2 = inttoptr i64 %a.2 to ptr
+ %c = icmp eq ptr %p, %p.2
+ ret i1 %c
+}
+
+define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_in_different_bb(i1 %bc, ptr %src, ptr %p2) {
+; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_in_different_bb(
+; CHECK-NEXT: br i1 [[BC:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
+; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
+; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT: ret i1 [[C]]
+; CHECK: else:
+; CHECK-NEXT: ret i1 false
+;
+ %i = ptrtoint ptr %src to i64
+ %a = add i64 %i, 10
+ %p = inttoptr i64 %a to ptr
+ br i1 %bc, label %then, label %else
+
+then:
+ %c = icmp eq ptr %p, %p2
+ ret i1 %c
+
+else:
+ ret i1 false
+}
+
+define i1 @inttoptr_add_ptrtoint_used_by_multiple_icmps(ptr %src, ptr %p2, ptr %p3) {
+; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_multiple_icmps(
+; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
+; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
+; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr
+; CHECK-NEXT: [[C_1:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[P3:%.*]], [[P]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[C_1]], [[C_2]]
+; CHECK-NEXT: ret i1 [[XOR]]
+;
+ %i = ptrtoint ptr %src to i64
+ %a = add i64 %i, 10
+ %p = inttoptr i64 %a to ptr
+ %c.1 = icmp eq ptr %p, %p2
+ %c.2 = icmp eq ptr %p, %p3
+ %xor = xor i1 %c.1, %c.2
+ ret i1 %xor
+}
+
+declare void @foo(ptr)
+
+define i1 @inttoptr_add_ptrtoint_used_by_multiple_icmps_and_other_user(ptr %src, ptr %p2, ptr %p3) {
+; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_multiple_icmps_and_other_user(
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 10
+; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[C_1:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[P3:%.*]], [[P]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[C_1]], [[C_2]]
+; CHECK-NEXT: call void @foo(ptr [[P]])
+; CHECK-NEXT: ret i1 [[XOR]]
+;
+ %i = ptrtoint ptr %src to i64
+ %a = add i64 %i, 10
+ %p = inttoptr i64 %a to ptr
+ %c.1 = icmp eq ptr %p, %p2
+ %c.2 = icmp eq ptr %p, %p3
+ %xor = xor i1 %c.1, %c.2
+ call void @foo(ptr %p)
+ ret i1 %xor
+}
+
define i32 @h(i1 %A, i32 %B) {
; CHECK-LABEL: @h(
; CHECK-NEXT: EntryBlock:
>From eb0d6f0c6919ca5ad3a92773678054259089e5eb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 13 Aug 2025 15:11:44 +0100
Subject: [PATCH 2/3] [InstComb] Try to convert inttoptr (add (ptrtoint %B),
%O) to GEP.
Replace inttoptr (add (ptrtoint %B), %O) with (getelementptr i8, %B, %o)
if all users are ICmp instruction, which in turn means only the address
value is compared. We should be able to do this, if the src pointer,
the integer type and the destination pointer types have the same
bitwidth and address space.
A common source of such (inttoptr (add (ptrtoint %B), %O)) is from
various iterations in libc++.
In practice this triggers in a number of files in Clang and various open
source projects, including cppcheck, diamond, llama and more.
Alive2 Proof with constant offset: https://alive2.llvm.org/ce/z/K_5N_B
---
.../InstCombine/InstCombineCasts.cpp | 16 +++++++++++++++
.../InstCombine/fold-bin-operand.ll | 20 +++++++------------
2 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a43a6ee1f58b0..2edabcec2aecf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2073,6 +2073,22 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
return new IntToPtrInst(P, CI.getType());
}
+ // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
+ //(getelementptr i8, %Base, %Offset) if all users are ICmps.
+ Value *Base;
+ Value *Offset;
+ if (match(CI.getOperand(0),
+ m_Add(m_PtrToInt(m_Value(Base)), m_Value(Offset))) &&
+ all_of(CI.users(), IsaPred<ICmpInst>)) {
+ Type *BasePtrTy = Base->getType();
+ if (CI.getType()->getPointerAddressSpace() ==
+ BasePtrTy->getPointerAddressSpace() &&
+ DL.getTypeSizeInBits(BasePtrTy) ==
+ DL.getTypeSizeInBits(CI.getSrcTy())) {
+ return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
+ }
+ }
+
if (Instruction *I = commonCastTransforms(CI))
return I;
diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
index 38baf237d9223..f596bc5d226be 100644
--- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -32,10 +32,8 @@ define i32 @g(i32 %x) {
define i1 @inttoptr_add_ptrtoint_used_by_single_icmp(ptr %src, ptr %p2) {
; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp(
-; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
-; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%i = ptrtoint ptr %src to i64
@@ -123,10 +121,8 @@ define i1 @inttoptr_add_ptrtoint_used_by_single_icmp_in_different_bb(i1 %bc, ptr
; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_single_icmp_in_different_bb(
; CHECK-NEXT: br i1 [[BC:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
-; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
-; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
; CHECK-NEXT: ret i1 [[C]]
; CHECK: else:
; CHECK-NEXT: ret i1 false
@@ -146,11 +142,9 @@ else:
define i1 @inttoptr_add_ptrtoint_used_by_multiple_icmps(ptr %src, ptr %p2, ptr %p3) {
; CHECK-LABEL: @inttoptr_add_ptrtoint_used_by_multiple_icmps(
-; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
-; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
-; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr
-; CHECK-NEXT: [[C_1:%.*]] = icmp eq ptr [[P2:%.*]], [[P]]
-; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[P3:%.*]], [[P]]
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 10
+; CHECK-NEXT: [[C_1:%.*]] = icmp eq ptr [[P2]], [[P:%.*]]
+; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[P2]], [[P3:%.*]]
; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[C_1]], [[C_2]]
; CHECK-NEXT: ret i1 [[XOR]]
;
>From 89b8c4a36f845237eac7ffede5b490bcf9c09bea Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 13 Aug 2025 21:27:31 +0100
Subject: [PATCH 3/3] !fixup use m_PtrToIntSameSize
---
llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 2edabcec2aecf..184d573f52fe7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2078,15 +2078,11 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
Value *Base;
Value *Offset;
if (match(CI.getOperand(0),
- m_Add(m_PtrToInt(m_Value(Base)), m_Value(Offset))) &&
+ m_Add(m_PtrToIntSameSize(DL, m_Value(Base)), m_Value(Offset))) &&
+ CI.getType()->getPointerAddressSpace() ==
+ Base->getType()->getPointerAddressSpace() &&
all_of(CI.users(), IsaPred<ICmpInst>)) {
- Type *BasePtrTy = Base->getType();
- if (CI.getType()->getPointerAddressSpace() ==
- BasePtrTy->getPointerAddressSpace() &&
- DL.getTypeSizeInBits(BasePtrTy) ==
- DL.getTypeSizeInBits(CI.getSrcTy())) {
- return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
- }
+ return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
}
if (Instruction *I = commonCastTransforms(CI))
More information about the llvm-commits
mailing list