[PATCH] D141590: [PassManager] Add some passes to the sequence of extra vector passes
Tiehu Zhang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 12 03:30:12 PST 2023
TiehuZhang added a comment.
The patch may be helpful for generating better IR in some cases, e.g.,
void testNestLoop(const int nCells, const double *x, double *b, const double *values, const int max_row_length){
int value_index = 0;
for(int i = 0; i < nCells; i++) {
double temp_value = 0.0;
for (int j = value_index; j < value_index + max_row_length; j++) {
temp_value += values[j] * x[j];
}
value_index += max_row_length;
b[i] = temp_value;
}
}
`option: -O3 -march=armv8.2-a+sve -S -emit-llvm -mllvm -extra-vectorizer-passes=true`
**Without the patch**
entry:
%cmp26 = icmp sgt i32 %nCells, 0
br i1 %cmp26, label %for.cond1.preheader.preheader, label %for.cond.cleanup
for.cond1.preheader.preheader: ; preds = %entry
%0 = sext i32 %max_row_length to i64
%wide.trip.count = zext i32 %nCells to i64
%cmp222 = icmp sgt i32 %max_row_length, 0
br i1 %cmp222, label %for.cond1.preheader.preheader.split.us, label %for.cond1.preheader
for.cond1.preheader.preheader.split.us: ; preds = %for.cond1.preheader.preheader
%1 = tail call i64 @llvm.vscale.i64()
%2 = shl nuw nsw i64 %1, 2
br label %for.cond1.preheader.us
...
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
%indvars.iv32 = phi i64 [ %indvars.iv.next33, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ]
%arrayidx9 = getelementptr inbounds double, ptr %b, i64 %indvars.iv32
store double 0.000000e+00, ptr %arrayidx9, align 8, !tbaa !6
%indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
%exitcond.not = icmp eq i64 %indvars.iv.next33, %wide.trip.count
br i1 %exitcond.not, label %for.cond.cleanup, label %for.cond1.preheader, !llvm.loop !15
for.cond.cleanup: ; preds = %for.cond1.preheader, %for.cond.cleanup3.us, %entry
ret void
**With the patch**
entry:
%cmp26 = icmp sgt i32 %nCells, 0
br i1 %cmp26, label %for.cond1.preheader.preheader, label %for.cond.cleanup
for.cond1.preheader.preheader: ; preds = %entry
%0 = sext i32 %max_row_length to i64
%wide.trip.count = zext i32 %nCells to i64
%cmp222 = icmp sgt i32 %max_row_length, 0
br i1 %cmp222, label %for.cond1.preheader.preheader.split.us, label %for.cond1.preheader.preheader.split
for.cond1.preheader.preheader.split.us: ; preds = %for.cond1.preheader.preheader
%1 = tail call i64 @llvm.vscale.i64()
%2 = shl nuw nsw i64 %1, 2
br label %for.cond1.preheader.us
...
for.cond1.preheader.preheader.split: ; preds = %for.cond1.preheader.preheader
%22 = shl nuw nsw i64 %wide.trip.count, 3
tail call void @llvm.memset.p0.i64(ptr align 8 %b, i8 0, i64 %22, i1 false), !tbaa !6
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup3.us, %for.cond1.preheader.preheader.split, %entry
ret void
With the patch, the simple loop `for.cond1.preheader` is transformed to more efficient `memset`, which may bring performance benefits to the nestloop.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D141590/new/
https://reviews.llvm.org/D141590
More information about the llvm-commits
mailing list