[llvm] de9e18d - [InstCombine] Handle ptrtoaddr in gep of pointer sub fold (#164818)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 27 01:16:52 PDT 2025


Author: Nikita Popov
Date: 2025-10-27T09:16:48+01:00
New Revision: de9e18dc75c432a59e94cecd0cab42909893123a

URL: https://github.com/llvm/llvm-project/commit/de9e18dc75c432a59e94cecd0cab42909893123a
DIFF: https://github.com/llvm/llvm-project/commit/de9e18dc75c432a59e94cecd0cab42909893123a.diff

LOG: [InstCombine] Handle ptrtoaddr in gep of pointer sub fold (#164818)

This extends the `ptradd x, ptrtoint(y) - ptrtoint(x)` to `y`
InstCombine fold to support ptrtoaddr. In the case where x and y have
the same underlying object, this is handled by InstSimplify already. If
the underlying object may differ, the replacement can only be performed
if provenance does not matter.

For pointers with non-address bits we need to be careful here, because
the pattern will return a pointer with the non-address bits of x and the
address bits of y. As such, uses in ptrtoaddr are safe to replace, but
uses in ptrtoint are not. Whether uses in icmp are safe to replace
depends on the outcome of the pending discussion on icmp semantics (I'll
adjust this in https://github.com/llvm/llvm-project/pull/163936 if/when
that lands).

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/ptrtoaddr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9c8de45a0e626..67f837c7ed968 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3358,21 +3358,21 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
       if (TyAllocSize == 1) {
         // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
-        // but only if the result pointer is only used as if it were an integer,
-        // or both point to the same underlying object (otherwise provenance is
-        // not necessarily retained).
+        // but only if the result pointer is only used as if it were an integer.
+        // (The case where the underlying object is the same is handled by
+        // InstSimplify.)
         Value *X = GEP.getPointerOperand();
         Value *Y;
-        if (match(GEP.getOperand(1),
-                  m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
+        if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)),
+                                           m_PtrToIntOrAddr(m_Specific(X)))) &&
             GEPType == Y->getType()) {
-          bool HasSameUnderlyingObject =
-              getUnderlyingObject(X) == getUnderlyingObject(Y);
+          bool HasNonAddressBits =
+              DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
           bool Changed = false;
           GEP.replaceUsesWithIf(Y, [&](Use &U) {
-            bool ShouldReplace = HasSameUnderlyingObject ||
-                                 isa<ICmpInst>(U.getUser()) ||
-                                 isa<PtrToIntInst>(U.getUser());
+            bool ShouldReplace = isa<PtrToAddrInst>(U.getUser()) ||
+                                 (!HasNonAddressBits &&
+                                  isa<ICmpInst, PtrToIntInst>(U.getUser()));
             Changed |= ShouldReplace;
             return ShouldReplace;
           });

diff  --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
index f19cca8f808c8..a7434a28c4164 100644
--- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
+++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
@@ -4,6 +4,10 @@
 ; The ptrtoaddr folds are also valid for pointers that have external state.
 target datalayout = "pe1:64:64:64:32"
 
+declare void @use.i1(i1)
+declare void @use.i32(i32)
+declare void @use.i64(i64)
+
 ; ptrtoaddr result type is fixed, and can't be combined with integer cast.
 define i32 @ptrtoaddr_trunc(ptr %p) {
 ; CHECK-LABEL: define i32 @ptrtoaddr_trunc(
@@ -171,3 +175,65 @@ define i128 @sub_zext_ptrtoint_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offs
   %sub = sub i128 %p2.addr.ext, %p.int.ext
   ret i128 %sub
 }
+
+; The uses in icmp, ptrtoint, ptrtoaddr should be replaced. The one in the
+; return value should not, as the provenance 
diff ers.
+define ptr @gep_sub_ptrtoaddr_
diff erent_obj(ptr %p, ptr %p2, ptr %p3) {
+; CHECK-LABEL: define ptr @gep_sub_ptrtoaddr_
diff erent_obj(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]], ptr [[P3:%.*]]) {
+; CHECK-NEXT:    [[P_ADDR:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT:    [[P2_ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[P2_ADDR]], [[P_ADDR]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[P2]], [[P3]]
+; CHECK-NEXT:    call void @use.i1(i1 [[CMP]])
+; CHECK-NEXT:    [[INT:%.*]] = ptrtoint ptr [[P2]] to i64
+; CHECK-NEXT:    call void @use.i64(i64 [[INT]])
+; CHECK-NEXT:    [[ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT:    call void @use.i64(i64 [[ADDR]])
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %p.addr = ptrtoaddr ptr %p to i64
+  %p2.addr = ptrtoaddr ptr %p2 to i64
+  %sub = sub i64 %p2.addr, %p.addr
+  %gep = getelementptr i8, ptr %p, i64 %sub
+  %cmp = icmp eq ptr %gep, %p3
+  call void @use.i1(i1 %cmp)
+  %int = ptrtoint ptr %gep to i64
+  call void @use.i64(i64 %int)
+  %addr = ptrtoaddr ptr %gep to i64
+  call void @use.i64(i64 %addr)
+  ret ptr %gep
+}
+
+; The use in ptrtoaddr should be replaced. The uses in ptrtoint and icmp should
+; not be replaced, as the non-address bits 
diff er. The use in the return value
+; should not be replaced as the provenace 
diff ers.
+define ptr addrspace(1) @gep_sub_ptrtoaddr_
diff erent_obj_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2, ptr addrspace(1) %p3) {
+; CHECK-LABEL: define ptr addrspace(1) @gep_sub_ptrtoaddr_
diff erent_obj_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]], ptr addrspace(1) [[P3:%.*]]) {
+; CHECK-NEXT:    [[P_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P]] to i32
+; CHECK-NEXT:    [[P2_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[P2_ADDR]], [[P_ADDR]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr addrspace(1) [[GEP]], [[P3]]
+; CHECK-NEXT:    call void @use.i1(i1 [[CMP]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
+; CHECK-NEXT:    [[INT:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    call void @use.i32(i32 [[INT]])
+; CHECK-NEXT:    [[ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32
+; CHECK-NEXT:    call void @use.i32(i32 [[ADDR]])
+; CHECK-NEXT:    ret ptr addrspace(1) [[GEP]]
+;
+  %p.addr = ptrtoaddr ptr addrspace(1) %p to i32
+  %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32
+  %sub = sub i32 %p2.addr, %p.addr
+  %gep = getelementptr i8, ptr addrspace(1) %p, i32 %sub
+  %cmp = icmp eq ptr addrspace(1) %gep, %p3
+  call void @use.i1(i1 %cmp)
+  %int = ptrtoint ptr addrspace(1) %gep to i32
+  call void @use.i32(i32 %int)
+  %addr = ptrtoaddr ptr addrspace(1) %gep to i32
+  call void @use.i32(i32 %addr)
+  ret ptr addrspace(1) %gep
+}


        


More information about the llvm-commits mailing list