<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - [Loop Idiom Recognize] Degrade performance when arrays are replaced by memset/memcpy"
href="https://bugs.llvm.org/show_bug.cgi?id=47300">47300</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>[Loop Idiom Recognize] Degrade performance when arrays are replaced by memset/memcpy
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>Other
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Loop Optimizer
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>anhtuyen@ca.ibm.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>Created <span class=""><a href="attachment.cgi?id=23888" name="attach_23888" title="The IR file of work.c when LIRP is not run.">attachment 23888</a> <a href="attachment.cgi?id=23888&action=edit" title="The IR file of work.c when LIRP is not run.">[details]</a></span>
The IR file of work.c when LIRP is not run.
If the trip count is "small", LIPR should not replace legal stores with
memset/memcpy. The following testcase can be used to demonstrate how Loop Idiom
Recognize Pass (LIRP) degrades the performance.
1) Compiler work.c to an IR file called work.ll.
2) Call opt with -loop-idiom to produce an IR file containing memset
instruction
opt -basic-aa -loop-idiom -S work.ll -o work.yes.ll
3) Inspect to make sure LIRP did replace store with memset in work.yes.ll, but
not in work.ll
5) Compile the test.c, and link it with the IR from (1) and then the IR from
(2).
clang++ -c test.c
clang++ test.o work.ll -o no
clang++ test.o work.yes.ll -o yes
6) Run both the executables on a quiet machine. On my performance machine,
times spent are:
With memset: **Time elapsed: 1.4215**
Without memset: **Time elapsed: 1.3611**
test.c
```
#include <stdio.h>
#include <time.h>
int work(int A[], int sizeI, int sizeL);
int main() {
int A[3] = {1, 2, 3};
int res = 1;
clock_t begin = clock();
res = work(A, 9999, 3);
clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("Time elapsed: %4.4f\n", time_spent);
return res;
}
```
work.c
```
int work(int A[], int sizeI, int arraySize) {
for (int i = 0; i < sizeI; ++i)
for (int j = 0; j < sizeI; ++j)
for (int k = 0; k < arraySize; ++k)
A[k] = 0;
return A[arraySize - 1];
}
```
IR before calling opt with **-loop-idiom**
```
; ModuleID = './work.ll'
source_filename = "work.c"
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; Function Attrs: noinline nounwind
define dso_local signext i32 @_Z4workPiii(i32* %A, i32 signext %sizeI, i32
signext %arraySize) #0 {
entry:
%cmp6 = icmp slt i32 0, %sizeI
br i1 %cmp6, label %for.body.preheader, label %for.end12
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds =
%for.body.preheader, %for.inc10
%i.07 = phi i32 [ %inc11, %for.inc10 ], [ 0, %for.body.preheader ]
%cmp23 = icmp slt i32 0, %sizeI
br i1 %cmp23, label %for.body3.preheader, label %for.inc10
for.body3.preheader: ; preds = %for.body
br label %for.body3
for.body3: ; preds =
%for.body3.preheader, %for.inc7
%j.04 = phi i32 [ %inc8, %for.inc7 ], [ 0, %for.body3.preheader ]
%cmp51 = icmp slt i32 0, %arraySize
br i1 %cmp51, label %for.body6.preheader, label %for.inc7
for.body6.preheader: ; preds = %for.body3
br label %for.body6
for.body6: ; preds =
%for.body6.preheader, %for.body6
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
for.inc7.loopexit: ; preds = %for.body6
br label %for.inc7
for.inc7: ; preds = %for.inc7.loopexit,
%for.body3
%inc8 = add nsw i32 %j.04, 1
%cmp2 = icmp slt i32 %inc8, %sizeI
br i1 %cmp2, label %for.body3, label %for.inc10.loopexit
for.inc10.loopexit: ; preds = %for.inc7
br label %for.inc10
for.inc10: ; preds =
%for.inc10.loopexit, %for.body
%inc11 = add nsw i32 %i.07, 1
%cmp = icmp slt i32 %inc11, %sizeI
br i1 %cmp, label %for.body, label %for.end12.loopexit
for.end12.loopexit: ; preds = %for.inc10
br label %for.end12
for.end12: ; preds =
%for.end12.loopexit, %entry
%sub = sub nsw i32 %arraySize, 1
%idxprom13 = sext i32 %sub to i64
%arrayidx14 = getelementptr inbounds i32, i32* %A, i64 %idxprom13
%0 = load i32, i32* %arrayidx14, align 4
ret i32 %0
}
attributes #0 = { noinline nounwind
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false"
"frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0"
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false" "no-trapping-math"="true"
"stack-protector-buffer-size"="8" "target-cpu"="pwr9"
"target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-spe"
"unsafe-fp-math"="false" "use-soft-float"="false"
}
```
Extract from IR **with memset**
```
for.body6.preheader: ; preds = %for.body3
call void @llvm.memset.p0i8.i64(i8* align 4 %A1, i8 0, i64 %1, i1 false)
br label %for.body6
for.body6: ; preds = %for.body6,
%for.body6.preheader
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
```
Extract from IR **without memset**
```
for.body6.preheader: ; preds = %for.body3
br label %for.body6
for.body6: ; preds = %for.body6,
%for.body6.preheader
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
```</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>