[llvm] [InstCombine] Add support for ptrtoaddr in pointer difference folds (PR #164428)
    via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Tue Oct 21 07:11:17 PDT 2025
    
    
  
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Nikita Popov (nikic)
<details>
<summary>Changes</summary>
This adds support for folding `ptrtoaddr(p2) - ptrtoaddr(p)` pointer subtractions. We can treat ptrtoaddr the same as ptrtoint as the transform is truncation safe anyway (and in fact supports explicit truncation as well).
The only interesting case is the subtraction of zext of ptrtoaddr. For this transform it's important that the address bits are not truncated. For ptrtoaddr this is always the case, for ptrtoint it requires an explicit type check. Previously this checked that the ptrtoint result type is the pointer int type. I'm relaxing this to a "result type is >= address size" check, so it works for both ptrtoint and ptrtoaddr. For this purpose a new matcher is introduced, as I expect that other folds are going to need this as well.
---
Full diff: https://github.com/llvm/llvm-project/pull/164428.diff
3 Files Affected:
- (modified) llvm/include/llvm/IR/PatternMatch.h (+30) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+6-6) 
- (modified) llvm/test/Transforms/InstCombine/ptrtoaddr.ll (+84) 
``````````diff
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 99f70b101c2ed..8b3112b75bc3c 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2111,6 +2111,28 @@ template <typename Op_t> struct PtrToIntSameSize_match {
   }
 };
 
+template <typename Op_t> struct PtrToIntOrAddr_GEAddrSize_match {
+  const DataLayout &DL;
+  Op_t Op;
+
+  PtrToIntOrAddr_GEAddrSize_match(const DataLayout &DL, const Op_t &OpMatch)
+      : DL(DL), Op(OpMatch) {}
+
+  template <typename OpTy> bool match(OpTy *V) const {
+    if (auto *O = dyn_cast<Operator>(V)) {
+      unsigned Opcode = O->getOpcode();
+      // The ptrtoaddr result type always matches the address size.
+      // For ptrtoint we have to explicitly check it.
+      return (Opcode == Instruction::PtrToAddr ||
+              (Opcode == Instruction::PtrToInt &&
+               O->getType()->getScalarSizeInBits() ==
+                   DL.getAddressSizeInBits(O->getOperand(0)->getType()))) &&
+             Op.match(O->getOperand(0));
+    }
+    return false;
+  }
+};
+
 template <typename Op_t> struct NNegZExt_match {
   Op_t Op;
 
@@ -2196,6 +2218,14 @@ template <typename OpTy> inline auto m_PtrToIntOrAddr(const OpTy &Op) {
   return m_CombineOr(m_PtrToInt(Op), m_PtrToAddr(Op));
 }
 
+/// Matches PtrToInt or PtrToAddr where the result is greater than or equal
+/// to the pointer address size.
+template <typename OpTy>
+inline PtrToIntOrAddr_GEAddrSize_match<OpTy>
+m_PtrToIntOrAddr_GEAddrSize(const DataLayout &DL, const OpTy &Op) {
+  return PtrToIntOrAddr_GEAddrSize_match<OpTy>(DL, Op);
+}
+
 /// Matches IntToPtr.
 template <typename OpTy>
 inline CastOperator_match<OpTy, Instruction::IntToPtr>
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 73ec4514f8414..a68096ef0dc8c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2760,21 +2760,21 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
   Value *LHSOp, *RHSOp;
-  if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
-      match(Op1, m_PtrToInt(m_Value(RHSOp))))
+  if (match(Op0, m_PtrToIntOrAddr(m_Value(LHSOp))) &&
+      match(Op1, m_PtrToIntOrAddr(m_Value(RHSOp))))
     if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),
                                                I.hasNoUnsignedWrap()))
       return replaceInstUsesWith(I, Res);
 
   // trunc(p)-trunc(q) -> trunc(p-q)
-  if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
-      match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+  if (match(Op0, m_Trunc(m_PtrToIntOrAddr(m_Value(LHSOp)))) &&
+      match(Op1, m_Trunc(m_PtrToIntOrAddr(m_Value(RHSOp)))))
     if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),
                                                /* IsNUW */ false))
       return replaceInstUsesWith(I, Res);
 
-  if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) &&
-      match(Op1, m_ZExtOrSelf(m_PtrToInt(m_Value(RHSOp))))) {
+  if (match(Op0, m_ZExt(m_PtrToIntOrAddr_GEAddrSize(DL, m_Value(LHSOp)))) &&
+      match(Op1, m_ZExtOrSelf(m_PtrToIntOrAddr(m_Value(RHSOp))))) {
     if (auto *GEP = dyn_cast<GEPOperator>(LHSOp)) {
       if (GEP->getPointerOperand() == RHSOp) {
         if (GEP->hasNoUnsignedWrap() || GEP->hasNoUnsignedSignedWrap()) {
diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
index 410c43c807ed9..af3dd63d81fdb 100644
--- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
+++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
@@ -40,3 +40,87 @@ define i128 @ptrtoaddr_sext(ptr %p) {
   %ext = sext i64 %p.addr to i128
   ret i128 %ext
 }
+
+define i64 @sub_ptrtoaddr(ptr %p, i64 %offset) {
+; CHECK-LABEL: define i64 @sub_ptrtoaddr(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[OFFSET:%.*]]) {
+; CHECK-NEXT:    ret i64 [[OFFSET]]
+;
+  %p2 = getelementptr i8, ptr %p, i64 %offset
+  %p.addr = ptrtoaddr ptr %p to i64
+  %p2.addr = ptrtoaddr ptr %p2 to i64
+  %sub = sub i64 %p2.addr, %p.addr
+  ret i64 %sub
+}
+
+define i32 @sub_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offset) {
+; CHECK-LABEL: define i32 @sub_ptrtoaddr_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[OFFSET:%.*]]) {
+; CHECK-NEXT:    ret i32 [[OFFSET]]
+;
+  %p2 = getelementptr i8, ptr addrspace(1) %p, i32 %offset
+  %p.addr = ptrtoaddr ptr addrspace(1) %p to i32
+  %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32
+  %sub = sub i32 %p2.addr, %p.addr
+  ret i32 %sub
+}
+
+define i32 @sub_trunc_ptrtoaddr(ptr %p, i64 %offset) {
+; CHECK-LABEL: define i32 @sub_trunc_ptrtoaddr(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[OFFSET:%.*]]) {
+; CHECK-NEXT:    [[SUB:%.*]] = trunc i64 [[OFFSET]] to i32
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %p2 = getelementptr i8, ptr %p, i64 %offset
+  %p.addr = ptrtoaddr ptr %p to i64
+  %p2.addr = ptrtoaddr ptr %p2 to i64
+  %p.addr.trunc = trunc i64 %p.addr to i32
+  %p2.addr.trunc = trunc i64 %p2.addr to i32
+  %sub = sub i32 %p2.addr.trunc, %p.addr.trunc
+  ret i32 %sub
+}
+
+define i16 @sub_trunc_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offset) {
+; CHECK-LABEL: define i16 @sub_trunc_ptrtoaddr_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[OFFSET:%.*]]) {
+; CHECK-NEXT:    [[SUB:%.*]] = trunc i32 [[OFFSET]] to i16
+; CHECK-NEXT:    ret i16 [[SUB]]
+;
+  %p2 = getelementptr i8, ptr addrspace(1) %p, i32 %offset
+  %p.addr = ptrtoaddr ptr addrspace(1) %p to i32
+  %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32
+  %p.addr.trunc = trunc i32 %p.addr to i16
+  %p2.addr.trunc = trunc i32 %p2.addr to i16
+  %sub = sub i16 %p2.addr.trunc, %p.addr.trunc
+  ret i16 %sub
+}
+
+define i128 @sub_zext_ptrtoaddr(ptr %p, i64 %offset) {
+; CHECK-LABEL: define i128 @sub_zext_ptrtoaddr(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[OFFSET:%.*]]) {
+; CHECK-NEXT:    [[SUB:%.*]] = zext i64 [[OFFSET]] to i128
+; CHECK-NEXT:    ret i128 [[SUB]]
+;
+  %p2 = getelementptr nuw i8, ptr %p, i64 %offset
+  %p.addr = ptrtoaddr ptr %p to i64
+  %p2.addr = ptrtoaddr ptr %p2 to i64
+  %p.addr.ext = zext i64 %p.addr to i128
+  %p2.addr.ext = zext i64 %p2.addr to i128
+  %sub = sub i128 %p2.addr.ext, %p.addr.ext
+  ret i128 %sub
+}
+
+define i64 @sub_zext_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offset) {
+; CHECK-LABEL: define i64 @sub_zext_ptrtoaddr_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[OFFSET:%.*]]) {
+; CHECK-NEXT:    [[SUB:%.*]] = zext i32 [[OFFSET]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %p2 = getelementptr nuw i8, ptr addrspace(1) %p, i32 %offset
+  %p.addr = ptrtoaddr ptr addrspace(1) %p to i32
+  %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32
+  %p.addr.ext = zext i32 %p.addr to i64
+  %p2.addr.ext = zext i32 %p2.addr to i64
+  %sub = sub i64 %p2.addr.ext, %p.addr.ext
+  ret i64 %sub
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/164428
    
    
More information about the llvm-commits
mailing list