[llvm] [VPlan] First step towards VPlan cost modeling. (PR #92555)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 17 13:54:37 PDT 2024
fhahn wrote:
> > @alexey-bataev would it be possible to share the unreduced input without an unused load in the loop?
>
> ```
> target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> define fastcc void @FmoInit(ptr %call.i, i64 %unroll_iter143) #0 {
> entry:
> %call.i10 = load volatile ptr, ptr null, align 8
> br label %for.body25.i
>
> for.body25.i: ; preds = %for.body25.i, %entry
> %indvars.iv12.i = phi i64 [ 0, %entry ], [ %indvars.iv.next13.i.7, %for.body25.i ]
> %div66.i = lshr i64 %indvars.iv12.i, 1
> %arrayidx27.i = getelementptr nusw i32, ptr %call.i, i64 %div66.i
> %0 = load i32, ptr %arrayidx27.i, align 4
> %arrayidx29.i = getelementptr i32, ptr %call.i10, i64 %indvars.iv12.i
> store i32 %0, ptr %arrayidx29.i, align 4
> %indvars.iv.next13.i = or i64 %indvars.iv12.i, 1
> %arrayidx29.i.1 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i
> store i32 0, ptr %arrayidx29.i.1, align 4
> %indvars.iv.next13.i.1 = or i64 %indvars.iv12.i, 2
> %div66.i.2 = lshr i64 %indvars.iv.next13.i.1, 1
> %arrayidx27.i.2 = getelementptr i32, ptr %call.i, i64 %div66.i.2
> %1 = load i32, ptr %arrayidx27.i.2, align 4
> %arrayidx29.i.2 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.1
> store i32 %1, ptr %arrayidx29.i.2, align 4
> %indvars.iv.next13.i.2 = or i64 %indvars.iv12.i, 3
> %arrayidx29.i.3 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.2
> store i32 0, ptr %arrayidx29.i.3, align 4
> %indvars.iv.next13.i.3 = or i64 %indvars.iv12.i, 4
> %arrayidx27.i.4 = getelementptr i32, ptr %call.i, i64 %indvars.iv12.i
> %2 = load i32, ptr %arrayidx27.i.4, align 4
> %arrayidx29.i.4 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.3
> store i32 %2, ptr %arrayidx29.i.4, align 4
> %indvars.iv.next13.i.4 = or i64 %indvars.iv12.i, 5
> %arrayidx29.i.5 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.4
> store i32 0, ptr %arrayidx29.i.5, align 4
> %indvars.iv.next13.i.5 = or i64 %indvars.iv12.i, 6
> %arrayidx29.i.6 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.5
> store i32 0, ptr %arrayidx29.i.6, align 4
> %indvars.iv.next13.i.6 = or i64 %indvars.iv12.i, 7
> %arrayidx29.i.7 = getelementptr i32, ptr %call.i10, i64 %indvars.iv.next13.i.6
> store i32 0, ptr %arrayidx29.i.7, align 4
> %indvars.iv.next13.i.7 = add nsw i64 %indvars.iv12.i, 8
> %niter144.ncmp.7 = icmp eq i64 %indvars.iv12.i, %unroll_iter143
> br i1 %niter144.ncmp.7, label %FmoGenerateMbToSliceGroupMap.exit.loopexit124.unr-lcssa.loopexit, label %for.body25.i
>
> FmoGenerateMbToSliceGroupMap.exit.loopexit124.unr-lcssa.loopexit: ; preds = %for.body25.i
> ret void
> }
>
> attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
> ```
>
> Try this one
Thanks, should be fixed now!
https://github.com/llvm/llvm-project/pull/92555
More information about the llvm-commits
mailing list