[llvm] [WIP][VPlan] Use BlockFrequencyInfo in getPredBlockCostDivisor (PR #158690)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 23 05:41:50 PDT 2025
================
@@ -1073,27 +1073,49 @@ for.end: ; preds = %for.inc
define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr nocapture readonly %trigger) local_unnamed_addr #0 {
; AVX1-LABEL: define void @foo6(
; AVX1-SAME: ptr readonly captures(none) [[IN:%.*]], ptr captures(none) [[OUT:%.*]], i32 [[SIZE:%.*]], ptr readonly captures(none) [[TRIGGER:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; AVX1-NEXT: [[ENTRY:.*]]:
-; AVX1-NEXT: br label %[[FOR_BODY:.*]]
-; AVX1: [[FOR_BODY]]:
-; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 4095, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ]
+; AVX1-NEXT: [[ENTRY:.*:]]
+; AVX1-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
+; AVX1: [[VECTOR_MEMCHECK]]:
+; AVX1-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[OUT]], i64 32768
+; AVX1-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER]], i64 16384
+; AVX1-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[IN]], i64 32768
+; AVX1-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[OUT]], [[SCEVGEP1]]
+; AVX1-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]]
+; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; AVX1-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[OUT]], [[SCEVGEP2]]
+; AVX1-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[IN]], [[SCEVGEP]]
+; AVX1-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; AVX1: [[VECTOR_PH]]:
+; AVX1-NEXT: br label %[[VECTOR_BODY:.*]]
+; AVX1: [[VECTOR_BODY]]:
+; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; AVX1-NEXT: [[INDVARS_IV:%.*]] = sub i64 4095, [[INDEX]]
; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; AVX1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
-; AVX1-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]]
-; AVX1: [[IF_THEN]]:
-; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[IN]], i64 [[INDVARS_IV]]
-; AVX1-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
-; AVX1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e-01
-; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]]
-; AVX1-NEXT: store double [[ADD]], ptr [[ARRAYIDX5]], align 8
-; AVX1-NEXT: br label %[[FOR_INC]]
-; AVX1: [[FOR_INC]]:
-; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
-; AVX1-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
-; AVX1-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
-; AVX1: [[FOR_END]]:
-; AVX1-NEXT: ret void
+; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 0
+; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -3
+; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META18:![0-9]+]]
+; AVX1-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer
----------------
lukel97 wrote:
BPI computes `>= 0` to be slightly >50%, which influences the cost for AVX1 here.
https://github.com/llvm/llvm-project/pull/158690
More information about the llvm-commits
mailing list