[all-commits] [llvm/llvm-project] 302b80: [InstCombine] Avoid Bitcast-GEP fusion for pointer...
hjooybar2 via All-commits
all-commits at lists.llvm.org
Tue Mar 16 14:04:58 PDT 2021
Branch: refs/heads/main
Home: https://github.com/llvm/llvm-project
Commit: 302b80abf036a10eb6a6c28199063a44570f1f3a
https://github.com/llvm/llvm-project/commit/302b80abf036a10eb6a6c28199063a44570f1f3a
Author: Mohammad Hadi Jooybar <mohammad.hadi.jooybar at huawei.com>
Date: 2021-03-16 (Tue, 16 Mar 2021)
Changed paths:
M llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
M llvm/test/Transforms/InstCombine/getelementptr.ll
Log Message:
-----------
[InstCombine] Avoid Bitcast-GEP fusion for pointers directly from allocation functions
Elimination of bitcasts with void pointer arguments results in GEPs with pure byte indexes. These GEPs do not preserve struct/array information and interrupt phi address translation in later pipeline stages.
Here is the original motivation for this patch:
```
#include<stdio.h>
#include<malloc.h>
typedef struct __Node{
double f;
struct __Node *next;
} Node;
void foo () {
Node *a = (Node*) malloc (sizeof(Node));
a->next = NULL;
a->f = 11.5f;
Node *ptr = a;
double sum = 0.0f;
while (ptr) {
sum += ptr->f;
ptr = ptr->next;
}
printf("%f\n", sum);
}
```
By explicit assignment `a->next = NULL`, we can infer the length of the link list is `1`. In this case we can eliminate while loop traversal entirely. This elimination is supposed to be performed by GVN/MemoryDependencyAnalysis/PhiTranslation .
The final IR before this patch:
```
define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
entry:
%call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
%next = getelementptr inbounds i8, i8* %call, i64 8
%0 = bitcast i8* %next to %struct.__Node**
store %struct.__Node* null, %struct.__Node** %0, align 8, !tbaa !2
%f = bitcast i8* %call to double*
store double 1.150000e+01, double* %f, align 8, !tbaa !8
%tobool12 = icmp eq i8* %call, null
br i1 %tobool12, label %while.end, label %while.body.lr.ph
while.body.lr.ph: ; preds = %entry
%1 = bitcast i8* %call to %struct.__Node*
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
%sum.014 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ]
%ptr.013 = phi %struct.__Node* [ %1, %while.body.lr.ph ], [ %3, %while.body ]
%f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
%2 = load double, double* %f1, align 8, !tbaa !8
%add = fadd contract double %sum.014, %2
%next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
%3 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
%tobool = icmp eq %struct.__Node* %3, null
br i1 %tobool, label %while.end, label %while.body
while.end: ; preds = %while.body, %entry
%sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %while.body ]
%call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
ret void
}
```
Final IR after this patch:
```
; Function Attrs: nofree nounwind
define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
while.end:
%call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 1.150000e+01)
ret void
}
```
IR before GVN before this patch:
```
define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
entry:
%call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
%next = getelementptr inbounds i8, i8* %call, i64 8
%0 = bitcast i8* %next to %struct.__Node**
store %struct.__Node* null, %struct.__Node** %0, align 8, !tbaa !2
%f = bitcast i8* %call to double*
store double 1.150000e+01, double* %f, align 8, !tbaa !8
%tobool12 = icmp eq i8* %call, null
br i1 %tobool12, label %while.end, label %while.body.lr.ph
while.body.lr.ph: ; preds = %entry
%1 = bitcast i8* %call to %struct.__Node*
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
%sum.014 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ]
%ptr.013 = phi %struct.__Node* [ %1, %while.body.lr.ph ], [ %3, %while.body ]
%f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
%2 = load double, double* %f1, align 8, !tbaa !8
%add = fadd contract double %sum.014, %2
%next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
%3 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
%tobool = icmp eq %struct.__Node* %3, null
br i1 %tobool, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
%add.lcssa = phi double [ %add, %while.body ]
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
%sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
%call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
ret void
}
```
IR before GVN after this patch:
```
define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 {
entry:
%call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2
%0 = bitcast i8* %call to %struct.__Node*
%next = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 1
store %struct.__Node* null, %struct.__Node** %next, align 8, !tbaa !2
%f = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 0
store double 1.150000e+01, double* %f, align 8, !tbaa !8
%tobool12 = icmp eq i8* %call, null
br i1 %tobool12, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%sum.014 = phi double [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
%ptr.013 = phi %struct.__Node* [ %2, %while.body ], [ %0, %while.body.preheader ]
%f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0
%1 = load double, double* %f1, align 8, !tbaa !8
%add = fadd contract double %sum.014, %1
%next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1
%2 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2
%tobool = icmp eq %struct.__Node* %2, null
br i1 %tobool, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
%add.lcssa = phi double [ %add, %while.body ]
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
%sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
%call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa)
ret void
}
```
The phi translation fails before this patch and it prevents GVN to remove the loop. The reason for this failure is in InstCombine. When the Instruction combining pass decides to convert:
```
%call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16)
%0 = bitcast i8* %call to %struct.__Node*
%next = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 1
store %struct.__Node* null, %struct.__Node** %next
```
to
```
%call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16)
%next = getelementptr inbounds i8, i8* %call, i64 8
%0 = bitcast i8* %next to %struct.__Node**
store %struct.__Node* null, %struct.__Node** %0
```
GEP instructions with pure byte indexes (e.g. `getelementptr inbounds i8, i8* %call, i64 8`) are obstacles for address translation. address translation is looking for structural similarity between GEPs and these GEPs usually do not match since they have different structure.
This change will cause couple of failures in LLVM-tests. However, in all cases we need to change expected result by the test. I will update those tests as soon as I get green light on this patch.
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D96881
More information about the All-commits
mailing list