[llvm] 949ec83 - [InstCombine] Relax the same-underlying-object constraint for the GEP canonicalization (#76583)

via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 31 08:35:48 PST 2023


Author: Yingwei Zheng
Date: 2024-01-01T00:35:42+08:00
New Revision: 949ec83eaf6fa6dbffb94c2ea9c0a4d5efdbd239

URL: https://github.com/llvm/llvm-project/commit/949ec83eaf6fa6dbffb94c2ea9c0a4d5efdbd239
DIFF: https://github.com/llvm/llvm-project/commit/949ec83eaf6fa6dbffb94c2ea9c0a4d5efdbd239.diff

LOG: [InstCombine] Relax the same-underlying-object constraint for the GEP canonicalization (#76583)

https://github.com/llvm/llvm-project/commit/7d7001b2cbd05bb1955c18e7f8668644bd1258dc
canonicalizes `(gep i8, X, (ptrtoint Y) - (ptrtoint X))` into `bitcast
Y` iff `X` and `Y` have the same underlying object.

I find that the result of this pattern is usually used as an operand of
an icmp in some real-world applications. I think we can do the
canonicalization if the result is only used by icmps/ptrtoints.

Alive2: https://alive2.llvm.org/ce/z/j4-HJZ

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/getelementptr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index df393d72a85bf2..351fc3b0174fc7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2471,14 +2471,26 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
       if (TyAllocSize == 1) {
         // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
-        // but only if both point to the same underlying object (otherwise
-        // provenance is not necessarily retained).
+        // but only if the result pointer is only used as if it were an integer,
+        // or both point to the same underlying object (otherwise provenance is
+        // not necessarily retained).
         Value *X = GEP.getPointerOperand();
         Value *Y;
         if (match(GEP.getOperand(1),
                   m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
-            getUnderlyingObject(X) == getUnderlyingObject(Y))
-          return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
+            GEPType == Y->getType()) {
+          bool HasSameUnderlyingObject =
+              getUnderlyingObject(X) == getUnderlyingObject(Y);
+          bool Changed = false;
+          GEP.replaceUsesWithIf(Y, [&](Use &U) {
+            bool ShouldReplace = HasSameUnderlyingObject ||
+                                 isa<ICmpInst>(U.getUser()) ||
+                                 isa<PtrToIntInst>(U.getUser());
+            Changed |= ShouldReplace;
+            return ShouldReplace;
+          });
+          return Changed ? &GEP : nullptr;
+        }
       } else {
         // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
         Value *V;

diff  --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 7d67f2583aa24d..373b7f5f2fc0a5 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1537,4 +1537,82 @@ define ptr @gep_ashr_without_exact(ptr %p, i64 %off) {
   ret ptr %ptr
 }
 
+define i1 @test_only_used_by_icmp(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test_only_used_by_icmp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %pa = ptrtoint ptr %a to i64
+  %pb = ptrtoint ptr %b to i64
+  %sub = sub i64 %pb, %pa
+  %gep = getelementptr i8, ptr %a, i64 %sub
+  %cmp = icmp eq ptr %gep, %c
+  ret i1 %cmp
+}
+
+define i64 @test_only_used_by_ptrtoint(ptr %a, ptr %b) {
+; CHECK-LABEL: @test_only_used_by_ptrtoint(
+; CHECK-NEXT:    [[VAL:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[VAL]]
+;
+  %pa = ptrtoint ptr %a to i64
+  %pb = ptrtoint ptr %b to i64
+  %sub = sub i64 %pb, %pa
+  %gep = getelementptr i8, ptr %a, i64 %sub
+  %val = ptrtoint ptr %gep to i64
+  ret i64 %val
+}
+
+define i64 @test_used_by_both(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test_used_by_both(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[VAL:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    ret i64 [[VAL]]
+; CHECK:       if.else:
+; CHECK-NEXT:    ret i64 0
+;
+  %pa = ptrtoint ptr %a to i64
+  %pb = ptrtoint ptr %b to i64
+  %sub = sub i64 %pb, %pa
+  %gep = getelementptr i8, ptr %a, i64 %sub
+  %cmp = icmp eq ptr %gep, %c
+  br i1 %cmp, label %if.then, label %if.else
+if.then:
+  %val = ptrtoint ptr %gep to i64
+  ret i64 %val
+if.else:
+  ret i64 0
+}
+
+; Negative tests
+
+define i64 @test_used_by_both_invalid(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test_used_by_both_invalid(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[PB:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[PA:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[PB]], [[PA]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUB]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i64, ptr [[GEP]], align 8
+; CHECK-NEXT:    ret i64 [[VAL]]
+; CHECK:       if.else:
+; CHECK-NEXT:    ret i64 0
+;
+  %pa = ptrtoint ptr %a to i64
+  %pb = ptrtoint ptr %b to i64
+  %sub = sub i64 %pb, %pa
+  %gep = getelementptr i8, ptr %a, i64 %sub
+  %cmp = icmp eq ptr %gep, %c
+  br i1 %cmp, label %if.then, label %if.else
+if.then:
+  %val = load i64, ptr %gep, align 8
+  ret i64 %val
+if.else:
+  ret i64 0
+}
+
 !0 = !{!"branch_weights", i32 2, i32 10}


        


More information about the llvm-commits mailing list