[llvm-bugs] [Bug 47300] New: [Loop Idiom Recognize] Degrade performance when arrays are replaced by memset/memcpy
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Aug 24 16:10:43 PDT 2020
https://bugs.llvm.org/show_bug.cgi?id=47300
Bug ID: 47300
Summary: [Loop Idiom Recognize] Degrade performance when arrays
are replaced by memset/memcpy
Product: libraries
Version: trunk
Hardware: Other
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Loop Optimizer
Assignee: unassignedbugs at nondot.org
Reporter: anhtuyen at ca.ibm.com
CC: llvm-bugs at lists.llvm.org
Created attachment 23888
--> https://bugs.llvm.org/attachment.cgi?id=23888&action=edit
The IR file of work.c when LIRP is not run.
If the trip count is "small", LIPR should not replace legal stores with
memset/memcpy. The following testcase can be used to demonstrate how Loop Idiom
Recognize Pass (LIRP) degrades the performance.
1) Compiler work.c to an IR file called work.ll.
2) Call opt with -loop-idiom to produce an IR file containing memset
instruction
opt -basic-aa -loop-idiom -S work.ll -o work.yes.ll
3) Inspect to make sure LIRP did replace store with memset in work.yes.ll, but
not in work.ll
5) Compile the test.c, and link it with the IR from (1) and then the IR from
(2).
clang++ -c test.c
clang++ test.o work.ll -o no
clang++ test.o work.yes.ll -o yes
6) Run both the executables on a quiet machine. On my performance machine,
times spent are:
With memset: **Time elapsed: 1.4215**
Without memset: **Time elapsed: 1.3611**
test.c
```
#include <stdio.h>
#include <time.h>
int work(int A[], int sizeI, int sizeL);
int main() {
int A[3] = {1, 2, 3};
int res = 1;
clock_t begin = clock();
res = work(A, 9999, 3);
clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("Time elapsed: %4.4f\n", time_spent);
return res;
}
```
work.c
```
int work(int A[], int sizeI, int arraySize) {
for (int i = 0; i < sizeI; ++i)
for (int j = 0; j < sizeI; ++j)
for (int k = 0; k < arraySize; ++k)
A[k] = 0;
return A[arraySize - 1];
}
```
IR before calling opt with **-loop-idiom**
```
; ModuleID = './work.ll'
source_filename = "work.c"
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; Function Attrs: noinline nounwind
define dso_local signext i32 @_Z4workPiii(i32* %A, i32 signext %sizeI, i32
signext %arraySize) #0 {
entry:
%cmp6 = icmp slt i32 0, %sizeI
br i1 %cmp6, label %for.body.preheader, label %for.end12
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds =
%for.body.preheader, %for.inc10
%i.07 = phi i32 [ %inc11, %for.inc10 ], [ 0, %for.body.preheader ]
%cmp23 = icmp slt i32 0, %sizeI
br i1 %cmp23, label %for.body3.preheader, label %for.inc10
for.body3.preheader: ; preds = %for.body
br label %for.body3
for.body3: ; preds =
%for.body3.preheader, %for.inc7
%j.04 = phi i32 [ %inc8, %for.inc7 ], [ 0, %for.body3.preheader ]
%cmp51 = icmp slt i32 0, %arraySize
br i1 %cmp51, label %for.body6.preheader, label %for.inc7
for.body6.preheader: ; preds = %for.body3
br label %for.body6
for.body6: ; preds =
%for.body6.preheader, %for.body6
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
for.inc7.loopexit: ; preds = %for.body6
br label %for.inc7
for.inc7: ; preds = %for.inc7.loopexit,
%for.body3
%inc8 = add nsw i32 %j.04, 1
%cmp2 = icmp slt i32 %inc8, %sizeI
br i1 %cmp2, label %for.body3, label %for.inc10.loopexit
for.inc10.loopexit: ; preds = %for.inc7
br label %for.inc10
for.inc10: ; preds =
%for.inc10.loopexit, %for.body
%inc11 = add nsw i32 %i.07, 1
%cmp = icmp slt i32 %inc11, %sizeI
br i1 %cmp, label %for.body, label %for.end12.loopexit
for.end12.loopexit: ; preds = %for.inc10
br label %for.end12
for.end12: ; preds =
%for.end12.loopexit, %entry
%sub = sub nsw i32 %arraySize, 1
%idxprom13 = sext i32 %sub to i64
%arrayidx14 = getelementptr inbounds i32, i32* %A, i64 %idxprom13
%0 = load i32, i32* %arrayidx14, align 4
ret i32 %0
}
attributes #0 = { noinline nounwind
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false"
"frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0"
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false" "no-trapping-math"="true"
"stack-protector-buffer-size"="8" "target-cpu"="pwr9"
"target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-spe"
"unsafe-fp-math"="false" "use-soft-float"="false"
}
```
Extract from IR **with memset**
```
for.body6.preheader: ; preds = %for.body3
call void @llvm.memset.p0i8.i64(i8* align 4 %A1, i8 0, i64 %1, i1 false)
br label %for.body6
for.body6: ; preds = %for.body6,
%for.body6.preheader
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
```
Extract from IR **without memset**
```
for.body6.preheader: ; preds = %for.body3
br label %for.body6
for.body6: ; preds = %for.body6,
%for.body6.preheader
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
```
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200824/de55d43f/attachment.html>
More information about the llvm-bugs
mailing list