[all-commits] [llvm/llvm-project] 302b80: [InstCombine] Avoid Bitcast-GEP fusion for pointer...

hjooybar2 via All-commits all-commits at lists.llvm.org
Tue Mar 16 14:04:58 PDT 2021


  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: 302b80abf036a10eb6a6c28199063a44570f1f3a
      https://github.com/llvm/llvm-project/commit/302b80abf036a10eb6a6c28199063a44570f1f3a
  Author: Mohammad Hadi Jooybar <mohammad.hadi.jooybar at huawei.com>
  Date:   2021-03-16 (Tue, 16 Mar 2021)

  Changed paths:
    M llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    M llvm/test/Transforms/InstCombine/getelementptr.ll

  Log Message:
  -----------
  [InstCombine] Avoid Bitcast-GEP fusion for pointers directly from allocation functions

Elimination of bitcasts with void pointer arguments results in GEPs with pure byte indexes. These GEPs do not preserve struct/array information and interrupt phi address translation in later pipeline stages.

Here is the original motivation for this patch:

```
#include<stdio.h>
#include<malloc.h>

typedef struct __Node{

  double f;
  struct __Node *next;

} Node;

void foo () {
  Node *a = (Node*) malloc (sizeof(Node));
  a->next = NULL;
  a->f = 11.5f;

  Node *ptr = a;
  double sum = 0.0f;
  while (ptr) {
    sum += ptr->f;
    ptr = ptr->next;
  }
  printf("%f\n", sum);
}
```
By explicit assignment  `a->next = NULL`, we can infer the length of the link list is `1`. In this case we can eliminate while loop traversal entirely. This elimination is supposed to be performed by GVN/MemoryDependencyAnalysis/PhiTranslation .

The final IR before this patch:

```
define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
entry:
  %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
  %next = getelementptr inbounds i8, i8* %call, i64 8
  %0 = bitcast i8* %next to %struct.__Node**
  store %struct.__Node* null, %struct.__Node** %0, align 8, !tbaa !2
  %f = bitcast i8* %call to double*
  store double 1.150000e+01, double* %f, align 8, !tbaa !8
  %tobool12 = icmp eq i8* %call, null
  br i1 %tobool12, label %while.end, label %while.body.lr.ph

while.body.lr.ph:                                 ; preds = %entry
  %1 = bitcast i8* %call to %struct.__Node*
  br label %while.body

while.body:                                       ; preds = %while.body.lr.ph, %while.body
  %sum.014 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ]
  %ptr.013 = phi %struct.__Node* [ %1, %while.body.lr.ph ], [ %3, %while.body ]
  %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
  %2 = load double, double* %f1, align 8, !tbaa !8
  %add = fadd contract double %sum.014, %2
  %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
  %3 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
  %tobool = icmp eq %struct.__Node* %3, null
  br i1 %tobool, label %while.end, label %while.body

while.end:                                        ; preds = %while.body, %entry
  %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %while.body ]
  %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
  ret void
}
```

Final IR after this patch:
```
; Function Attrs: nofree nounwind
define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
while.end:
  %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 1.150000e+01)
  ret void
}
```

IR before GVN before this patch:
```
define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
entry:
  %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
  %next = getelementptr inbounds i8, i8* %call, i64 8
  %0 = bitcast i8* %next to %struct.__Node**
  store %struct.__Node* null, %struct.__Node** %0, align 8, !tbaa !2
  %f = bitcast i8* %call to double*
  store double 1.150000e+01, double* %f, align 8, !tbaa !8
  %tobool12 = icmp eq i8* %call, null
  br i1 %tobool12, label %while.end, label %while.body.lr.ph

while.body.lr.ph:                                 ; preds = %entry
  %1 = bitcast i8* %call to %struct.__Node*
  br label %while.body

while.body:                                       ; preds = %while.body.lr.ph, %while.body
  %sum.014 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ]
  %ptr.013 = phi %struct.__Node* [ %1, %while.body.lr.ph ], [ %3, %while.body ]
  %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
  %2 = load double, double* %f1, align 8, !tbaa !8
  %add = fadd contract double %sum.014, %2
  %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
  %3 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
  %tobool = icmp eq %struct.__Node* %3, null
  br i1 %tobool, label %while.end.loopexit, label %while.body

while.end.loopexit:                               ; preds = %while.body
  %add.lcssa = phi double [ %add, %while.body ]
  br label %while.end

while.end:                                        ; preds = %while.end.loopexit, %entry
  %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
  %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
  ret void
}
```
IR before GVN after this patch:
```
define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
entry:
  %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
  %0 = bitcast i8* %call to %struct.__Node*
  %next = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 1
  store %struct.__Node* null, %struct.__Node** %next, align 8, !tbaa !2
  %f = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 0
  store double 1.150000e+01, double* %f, align 8, !tbaa !8
  %tobool12 = icmp eq i8* %call, null
  br i1 %tobool12, label %while.end, label %while.body.preheader

while.body.preheader:                             ; preds = %entry
  br label %while.body

while.body:                                       ; preds = %while.body.preheader, %while.body
  %sum.014 = phi double [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
  %ptr.013 = phi %struct.__Node* [ %2, %while.body ], [ %0, %while.body.preheader ]
  %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
  %1 = load double, double* %f1, align 8, !tbaa !8
  %add = fadd contract double %sum.014, %1
  %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
  %2 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
  %tobool = icmp eq %struct.__Node* %2, null
  br i1 %tobool, label %while.end.loopexit, label %while.body

while.end.loopexit:                               ; preds = %while.body
  %add.lcssa = phi double [ %add, %while.body ]
  br label %while.end

while.end:                                        ; preds = %while.end.loopexit, %entry
  %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
  %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
  ret void
}
```

The phi translation fails before this patch and it prevents GVN to remove the loop. The reason for this failure is in InstCombine. When the Instruction combining pass decides to convert:
```
 %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16)
  %0 = bitcast i8* %call to %struct.__Node*
  %next = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 1
  store %struct.__Node* null, %struct.__Node** %next
```
to
```
%call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16)
  %next = getelementptr inbounds i8, i8* %call, i64 8
  %0 = bitcast i8* %next to %struct.__Node**
  store %struct.__Node* null, %struct.__Node** %0

```

GEP instructions with pure byte indexes (e.g. `getelementptr inbounds i8, i8* %call, i64 8`) are obstacles for address translation. address translation is looking for structural similarity between GEPs and these GEPs usually do not match since they have different structure.

This change will cause couple of failures in LLVM-tests. However, in all cases we need to change expected result by the test. I will update those tests as soon as I get green light on this patch.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D96881




More information about the All-commits mailing list