[PATCH] D86262: [LoopIdiomRecognizePass] Options to disable part or the entire Loop Idiom Recognize Pass
Anh Tuyen Tran via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 21 13:50:46 PDT 2020
anhtuyen added a comment.
To avoid any issue with NDA, I wrote a simple test as follows. This test program tries to prove the fact that, the impact of Loop Idiom Recognize Pass (LIGP)’s replacing store with memset is not always a positive one.
1. Compiler **work.c** to an IR file called **work.ll**. We will use this IR file **work.ll** for both **LIRP** and **LIRP --disable-loop-idiom=memset **
2. Call opt with **-loop-idiom** to produce an IR file **containing memset** instruction
opt -basic-aa -loop-idiom -S work.ll -o work.yes.ll
3. Call opt with **-loop-idiom --disable-loop-idiom=memset **to produce an IR file **without memset** instruction
opt -basic-aa -loop-idiom --disable-loop-idiom=memset -S work.ll -o work.no.ll
4. Inspect to make sure LIRP did **replace store with memset in work.yes.ll, but not in work.no.ll **
5. Compile the **test.c,** and link it with the IR from (2) and then the IR from (3).
clang++ -c test.c
clang++ test.o work.yes.ll -o yes
clang++ test.o work.no.ll -o no
6. Run both the executables on a quiet machine. On my performance machine, times spent are:
With memset: **Time elapsed: 1.4215**
Without memset: **Time elapsed: 1.3611**
test.c
#include <stdio.h>
#include <time.h>
int work(int A[], int sizeI, int sizeL);
int main() {
int A[3] = {1, 2, 3};
int res = 1;
clock_t begin = clock();
res = work(A, 9999, 3);
clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("Time elapsed: %4.4f\n", time_spent);
return res;
}
work.c
int work(int A[], int sizeI, int arraySize) {
for (int i = 0; i < sizeI; ++i)
for (int j = 0; j < sizeI; ++j)
for (int k = 0; k < arraySize; ++k)
A[k] = 0;
return A[arraySize - 1];
}
IR before calling opt with **-loop-idiom**
; ModuleID = './work.ll'
source_filename = "work.c"
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; Function Attrs: noinline nounwind
define dso_local signext i32 @_Z4workPiii(i32* %A, i32 signext %sizeI, i32 signext %arraySize) #0 {
entry:
%cmp6 = icmp slt i32 0, %sizeI
br i1 %cmp6, label %for.body.preheader, label %for.end12
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body.preheader, %for.inc10
%i.07 = phi i32 [ %inc11, %for.inc10 ], [ 0, %for.body.preheader ]
%cmp23 = icmp slt i32 0, %sizeI
br i1 %cmp23, label %for.body3.preheader, label %for.inc10
for.body3.preheader: ; preds = %for.body
br label %for.body3
for.body3: ; preds = %for.body3.preheader, %for.inc7
%j.04 = phi i32 [ %inc8, %for.inc7 ], [ 0, %for.body3.preheader ]
%cmp51 = icmp slt i32 0, %arraySize
br i1 %cmp51, label %for.body6.preheader, label %for.inc7
for.body6.preheader: ; preds = %for.body3
br label %for.body6
for.body6: ; preds = %for.body6.preheader, %for.body6
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
for.inc7.loopexit: ; preds = %for.body6
br label %for.inc7
for.inc7: ; preds = %for.inc7.loopexit, %for.body3
%inc8 = add nsw i32 %j.04, 1
%cmp2 = icmp slt i32 %inc8, %sizeI
br i1 %cmp2, label %for.body3, label %for.inc10.loopexit
for.inc10.loopexit: ; preds = %for.inc7
br label %for.inc10
for.inc10: ; preds = %for.inc10.loopexit, %for.body
%inc11 = add nsw i32 %i.07, 1
%cmp = icmp slt i32 %inc11, %sizeI
br i1 %cmp, label %for.body, label %for.end12.loopexit
for.end12.loopexit: ; preds = %for.inc10
br label %for.end12
for.end12: ; preds = %for.end12.loopexit, %entry
%sub = sub nsw i32 %arraySize, 1
%idxprom13 = sext i32 %sub to i64
%arrayidx14 = getelementptr inbounds i32, i32* %A, i64 %idxprom13
%0 = load i32, i32* %arrayidx14, align 4
ret i32 %0
}
attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-spe" "unsafe-fp-math"="false" "use-soft-float"="false"
}
Extract from IR **with memset**
for.body6.preheader: ; preds = %for.body3
call void @llvm.memset.p0i8.i64(i8* align 4 %A1, i8 0, i64 %1, i1 false)
br label %for.body6
for.body6: ; preds = %for.body6, %for.body6.preheader
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
Extract from IR **without memset**
for.body6.preheader: ; preds = %for.body3
br label %for.body6
for.body6: ; preds = %for.body6, %for.body6.preheader
%k.02 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i32 %k.02, 1
%cmp5 = icmp slt i32 %inc, %arraySize
br i1 %cmp5, label %for.body6, label %for.inc7.loopexit
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D86262/new/
https://reviews.llvm.org/D86262
More information about the llvm-commits
mailing list