[PATCH] D96881: Avoid Bitcast-GEP fusion for void pointers

mohammad hadi jooybar via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 17 10:46:07 PST 2021


hjooybar2 created this revision.
hjooybar2 added reviewers: majnemer, hfinkel, lattner.
Herald added subscribers: kosarev, hiraditya.
hjooybar2 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Elimination of bitcasts with void pointer arguments results in GEPs with pure byte indexes. These GEPs do not preserve struct/array information and interrupt phi address translation in later pipeline stages.

Here is the original motivation for this patch:

  #include<stdio.h>
  #include<malloc.h>
  
  typedef struct __Node{
  
    double f;
    struct __Node *next;
  
  } Node;
  
  
  void foo () {
    Node *a = (Node*) malloc (sizeof(Node));
    a->next = NULL;
    a->f = 11.5f;
  
    Node *ptr = a;
    double sum = 0.0f;
    while (ptr) {
      sum += ptr->f;
      ptr = ptr->next;
    }
    printf("%f\n", sum);
  }

By explicit assignment  `a->next = NULL`, we can infer the length of the link list is `1`. In this case we can eliminate while loop traversal entirely. This elimination is supposed to be performed by GVN/MemoryDependencyAnalysis/PhiTranslation .

The final IR before this patch:

  define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
  entry:
    %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
    %next = getelementptr inbounds i8, i8* %call, i64 8
    %0 = bitcast i8* %next to %struct.__Node**
    store %struct.__Node* null, %struct.__Node** %0, align 8, !tbaa !2
    %f = bitcast i8* %call to double*
    store double 1.150000e+01, double* %f, align 8, !tbaa !8
    %tobool12 = icmp eq i8* %call, null
    br i1 %tobool12, label %while.end, label %while.body.lr.ph
  
  while.body.lr.ph:                                 ; preds = %entry
    %1 = bitcast i8* %call to %struct.__Node*
    br label %while.body
  
  while.body:                                       ; preds = %while.body.lr.ph, %while.body
    %sum.014 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ]
    %ptr.013 = phi %struct.__Node* [ %1, %while.body.lr.ph ], [ %3, %while.body ]
    %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
    %2 = load double, double* %f1, align 8, !tbaa !8
    %add = fadd contract double %sum.014, %2
    %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
    %3 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
    %tobool = icmp eq %struct.__Node* %3, null
    br i1 %tobool, label %while.end, label %while.body
  
  while.end:                                        ; preds = %while.body, %entry
    %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %while.body ]
    %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
    ret void
  }

Final IR after this patch:

  ; Function Attrs: nofree nounwind
  define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
  while.end:
    %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 1.150000e+01)
    ret void
  }

IR before GVN before this patch:

  define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
  entry:
    %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
    %next = getelementptr inbounds i8, i8* %call, i64 8
    %0 = bitcast i8* %next to %struct.__Node**
    store %struct.__Node* null, %struct.__Node** %0, align 8, !tbaa !2
    %f = bitcast i8* %call to double*
    store double 1.150000e+01, double* %f, align 8, !tbaa !8
    %tobool12 = icmp eq i8* %call, null
    br i1 %tobool12, label %while.end, label %while.body.lr.ph
  
  while.body.lr.ph:                                 ; preds = %entry
    %1 = bitcast i8* %call to %struct.__Node*
    br label %while.body
  
  while.body:                                       ; preds = %while.body.lr.ph, %while.body
    %sum.014 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ]
    %ptr.013 = phi %struct.__Node* [ %1, %while.body.lr.ph ], [ %3, %while.body ]
    %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
    %2 = load double, double* %f1, align 8, !tbaa !8
    %add = fadd contract double %sum.014, %2
    %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
    %3 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
    %tobool = icmp eq %struct.__Node* %3, null
    br i1 %tobool, label %while.end.loopexit, label %while.body
  
  while.end.loopexit:                               ; preds = %while.body
    %add.lcssa = phi double [ %add, %while.body ]
    br label %while.end
  
  while.end:                                        ; preds = %while.end.loopexit, %entry
    %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
    %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
    ret void
  }

IR before GVN after this patch:

  define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
  entry:
    %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
    %0 = bitcast i8* %call to %struct.__Node*
    %next = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 1
    store %struct.__Node* null, %struct.__Node** %next, align 8, !tbaa !2
    %f = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 0
    store double 1.150000e+01, double* %f, align 8, !tbaa !8
    %tobool12 = icmp eq i8* %call, null
    br i1 %tobool12, label %while.end, label %while.body.preheader
  
  while.body.preheader:                             ; preds = %entry
    br label %while.body
  
  while.body:                                       ; preds = %while.body.preheader, %while.body
    %sum.014 = phi double [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
    %ptr.013 = phi %struct.__Node* [ %2, %while.body ], [ %0, %while.body.preheader ]
    %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
    %1 = load double, double* %f1, align 8, !tbaa !8
    %add = fadd contract double %sum.014, %1
    %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
    %2 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
    %tobool = icmp eq %struct.__Node* %2, null
    br i1 %tobool, label %while.end.loopexit, label %while.body
  
  while.end.loopexit:                               ; preds = %while.body
    %add.lcssa = phi double [ %add, %while.body ]
    br label %while.end
  
  while.end:                                        ; preds = %while.end.loopexit, %entry
    %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
    %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
    ret void
  }

The phi translation fails before this patch and it prevents GVN to remove the loop. The reason for this failure is in InstCombine. When the Instruction combining pass decides to convert:

  %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16)
   %0 = bitcast i8* %call to %struct.__Node*
   %next = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 1
   store %struct.__Node* null, %struct.__Node** %next

to

  %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) 
    %next = getelementptr inbounds i8, i8* %call, i64 8
    %0 = bitcast i8* %next to %struct.__Node**
    store %struct.__Node* null, %struct.__Node** %0

GEP instructions with pure byte indexes (e.g. `getelementptr inbounds i8, i8* %call, i64 8`) are obstacles for address translation. address translation is looking for structural similarity between GEPs and these GEPs usually do not match since they have different structure.

This change will cause couple of failures in LLVM-tests. However, in all cases we need to change expected result by the test. I will update those tests as soon as I get green light on this patch.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D96881

Files:
  llvm/lib/Transforms/InstCombine/InstructionCombining.cpp


Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2433,24 +2433,23 @@
     // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
     unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType);
     APInt Offset(OffsetBits, 0);
-    if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset)) {
+
+    // Returns true if the Type is void*
+    auto isVoidPointerType = [](Type *Ty) -> bool {
+      return (Ty->isPointerTy() && Ty->getPointerElementType()->isIntegerTy(8));
+    };
+
+    // If the bitcast argument is an allocation (or void* in general), The bitcast
+    // is for convertion to actual type of allocation. Removing such bitcasts,
+    // results in having GEPs with i8* base and pure byte offsets. That means GEP
+    // is not aware of Struct or array hierarchy. 
+    // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have 
+    // a better chance to succeed
+    if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
+        !isVoidPointerType(SrcType)) {
       // If this GEP instruction doesn't move the pointer, just replace the GEP
       // with a bitcast of the real input to the dest type.
       if (!Offset) {
-        // If the bitcast is of an allocation, and the allocation will be
-        // converted to match the type of the cast, don't touch this.
-        if (isa<AllocaInst>(SrcOp) || isAllocationFn(SrcOp, &TLI)) {
-          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
-          if (Instruction *I = visitBitCast(*BCI)) {
-            if (I != BCI) {
-              I->takeName(BCI);
-              BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
-              replaceInstUsesWith(*BCI, I);
-            }
-            return &GEP;
-          }
-        }
-
         if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
           return new AddrSpaceCastInst(SrcOp, GEPType);
         return new BitCastInst(SrcOp, GEPType);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D96881.324319.patch
Type: text/x-patch
Size: 2205 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210217/acd5ba21/attachment.bin>


More information about the llvm-commits mailing list