[llvm] [LV] Adding/modifying pre-commit tests for changing loop interleaving count computation (PR #74689)
Nilanjana Basu via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 7 22:11:24 PST 2023
================
@@ -53,9 +53,104 @@ for.end:
ret void
}
-; For a loop with unknown trip count but a profile showing an approx TC estimate of 32, when the
-; auto-vectorizer chooses VF 16, it should choose IC 2 since chances are high that the remainder loop
-; won't need to run
+; For this loop with known TC of 39, when the auto-vectorizer chooses VF 16, it should choose
+; IC 2 since there is a small remainder loop that needs to run after the vector loop.
+; CHECK: remark: <unknown>:0:0: vectorized loop (vectorization width: 16, interleaved count: 2)
+define void @loop_with_tc_39(ptr noalias %p, ptr noalias %q) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %tmp0 = getelementptr %pair, ptr %p, i64 %i, i32 0
+ %tmp1 = load i8, ptr %tmp0, align 1
+ %tmp2 = getelementptr %pair, ptr %p, i64 %i, i32 1
+ %tmp3 = load i8, ptr %tmp2, align 1
+ %add = add i8 %tmp1, %tmp3
+ %qi = getelementptr i8, ptr %q, i64 %i
+ store i8 %add, ptr %qi, align 1
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp eq i64 %i.next, 39
+ br i1 %cond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; TODO: For this loop with known TC of 48, when the auto-vectorizer chooses VF 16, it should choose
+; IC 1 since there will be no remainder loop that needs to run after the vector loop.
+; CHECK: remark: <unknown>:0:0: vectorized loop (vectorization width: 16, interleaved count: 2)
+define void @loop_with_tc_48(ptr noalias %p, ptr noalias %q) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %tmp0 = getelementptr %pair, ptr %p, i64 %i, i32 0
+ %tmp1 = load i8, ptr %tmp0, align 1
+ %tmp2 = getelementptr %pair, ptr %p, i64 %i, i32 1
+ %tmp3 = load i8, ptr %tmp2, align 1
+ %add = add i8 %tmp1, %tmp3
+ %qi = getelementptr i8, ptr %q, i64 %i
+ store i8 %add, ptr %qi, align 1
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp eq i64 %i.next, 48
+ br i1 %cond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; TODO: For this loop with known TC of 49, when the auto-vectorizer chooses VF 16, it should choose
+; IC 1 since a remainder loop TC of 1 is more efficient than remainder loop TC of 17 with IC 2
+; CHECK: remark: <unknown>:0:0: vectorized loop (vectorization width: 16, interleaved count: 2)
+define void @loop_with_tc_49(ptr noalias %p, ptr noalias %q) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %tmp0 = getelementptr %pair, ptr %p, i64 %i, i32 0
+ %tmp1 = load i8, ptr %tmp0, align 1
+ %tmp2 = getelementptr %pair, ptr %p, i64 %i, i32 1
+ %tmp3 = load i8, ptr %tmp2, align 1
+ %add = add i8 %tmp1, %tmp3
+ %qi = getelementptr i8, ptr %q, i64 %i
+ store i8 %add, ptr %qi, align 1
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp eq i64 %i.next, 49
+ br i1 %cond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; TODO: For this loop with known TC of 55, when the auto-vectorizer chooses VF 16, it should choose
----------------
nilanjana87 wrote:
> Do we have test cases for larger trip counts?
Added them. Also, increased the target-specific maximum IC in the test for evaluating the IC computation algorithm better.
https://github.com/llvm/llvm-project/pull/74689
More information about the llvm-commits
mailing list