[llvm] [VPlan] First step towards VPlan cost modeling. (PR #92555)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 17 07:28:08 PDT 2024
alexey-bataev wrote:
> @alexey-bataev would it be possible to share the unreduced input without an unused load in the loop?
```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define fastcc void @FmoInit(ptr %call.i, i64 %unroll_iter143) #0 {
entry:
%call.i10 = load volatile ptr, ptr null, align 8
br label %for.body25.i
for.body25.i: ; preds = %for.body25.i, %entry
%indvars.iv12.i = phi i64 [ 0, %entry ], [ %indvars.iv.next13.i.7, %for.body25.i ]
%div66.i = lshr i64 %indvars.iv12.i, 1
%arrayidx27.i = getelementptr nusw i32, ptr %call.i, i64 %div66.i
%0 = load i32, ptr %arrayidx27.i, align 4
%arrayidx29.i = getelementptr i32, ptr %call.i10, i64 %indvars.iv12.i
store i32 %0, ptr %arrayidx29.i, align 4
%indvars.iv.next13.i = or i64 %indvars.iv12.i, 1
%arrayidx29.i.1 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i
store i32 0, ptr %arrayidx29.i.1, align 4
%indvars.iv.next13.i.1 = or i64 %indvars.iv12.i, 2
%div66.i.2 = lshr i64 %indvars.iv.next13.i.1, 1
%arrayidx27.i.2 = getelementptr i32, ptr %call.i, i64 %div66.i.2
%1 = load i32, ptr %arrayidx27.i.2, align 4
%arrayidx29.i.2 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.1
store i32 %1, ptr %arrayidx29.i.2, align 4
%indvars.iv.next13.i.2 = or i64 %indvars.iv12.i, 3
%arrayidx29.i.3 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.2
store i32 0, ptr %arrayidx29.i.3, align 4
%indvars.iv.next13.i.3 = or i64 %indvars.iv12.i, 4
%arrayidx27.i.4 = getelementptr i32, ptr %call.i, i64 %indvars.iv12.i
%2 = load i32, ptr %arrayidx27.i.4, align 4
%arrayidx29.i.4 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.3
store i32 %2, ptr %arrayidx29.i.4, align 4
%indvars.iv.next13.i.4 = or i64 %indvars.iv12.i, 5
%arrayidx29.i.5 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.4
store i32 0, ptr %arrayidx29.i.5, align 4
%indvars.iv.next13.i.5 = or i64 %indvars.iv12.i, 6
%arrayidx29.i.6 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.5
store i32 0, ptr %arrayidx29.i.6, align 4
%indvars.iv.next13.i.6 = or i64 %indvars.iv12.i, 7
%arrayidx29.i.7 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.6
store i32 0, ptr %arrayidx29.i.7, align 4
%indvars.iv.next13.i.7 = add nsw i64 %indvars.iv12.i, 8
%niter144.ncmp.7 = icmp eq i64 %indvars.iv12.i, %unroll_iter143
br i1 %niter144.ncmp.7, label %FmoGenerateMbToSliceGroupMap.exit.loopexit124.unr-lcssa.loopexit, label %for.body25.i
FmoGenerateMbToSliceGroupMap.exit.loopexit124.unr-lcssa.loopexit: ; preds = %for.body25.i
ret void
}
attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
```
Try this one
https://github.com/llvm/llvm-project/pull/92555
More information about the llvm-commits
mailing list