[llvm] e561eda - [LV] Prepare tests for D154261.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 3 09:49:46 PDT 2023
Author: Florian Hahn
Date: 2023-07-03T17:49:36+01:00
New Revision: e561edaaa56c9a8818d546774b141dead7224b50
URL: https://github.com/llvm/llvm-project/commit/e561edaaa56c9a8818d546774b141dead7224b50
DIFF: https://github.com/llvm/llvm-project/commit/e561edaaa56c9a8818d546774b141dead7224b50.diff
LOG: [LV] Prepare tests for D154261.
Update trip count of test in
pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll to
make sure epilogue vectorization will still trigger after D154261,
checking for the original issue.
Move the original test to limit-vf-by-tripcount.ll for testing new
functionality of D154261.
Added:
Modified:
llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll
llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll
index fc0d9ed67ff4cf..b8333ad0c47789 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s
+; RUN: opt -passes=loop-vectorize -mcpu=skylake-avx512 -S %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "x86_64-unknown-linux-gnu"
; TODO: Make sure selected VF for the epilog loop doesn't exceed remaining TC.
-define void @test_tc_17_no_epilogue_vectorization(ptr noalias %src, ptr noalias %dst) #0 {
+define void @test_tc_17_no_epilogue_vectorization(ptr noalias %src, ptr noalias %dst) {
; CHECK-LABEL: @test_tc_17_no_epilogue_vectorization(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
@@ -80,7 +80,7 @@ exit:
ret void
}
-define void @test_tc_18(ptr noalias %src, ptr noalias %dst) #0 {
+define void @test_tc_18(ptr noalias %src, ptr noalias %dst) {
; CHECK-LABEL: @test_tc_18(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
@@ -155,7 +155,7 @@ exit:
ret void
}
-define void @test_tc_19(ptr noalias %src, ptr noalias %dst) #0 {
+define void @test_tc_19(ptr noalias %src, ptr noalias %dst) {
; CHECK-LABEL: @test_tc_19(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
@@ -230,7 +230,7 @@ exit:
ret void
}
-define void @test_tc_20(ptr noalias %src, ptr noalias %dst) #0 {
+define void @test_tc_20(ptr noalias %src, ptr noalias %dst) {
; CHECK-LABEL: @test_tc_20(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -283,4 +283,77 @@ exit:
ret void
}
-attributes #0 = { "target-features"="+avx512f" }
+define void @limit_main_loop_vf_to_avoid_epilogue_vectorization(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: @limit_main_loop_vf_to_avoid_epilogue_vectorization(
+; CHECK-NEXT: iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK: vector.main.loop.iter.check:
+; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC:%.*]], i64 [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <96 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <96 x i8> [[WIDE_VEC]], <96 x i8> poison, <32 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45, i32 48, i32 51, i32 54, i32 57, i32 60, i32 63, i32 66, i32 69, i32 72, i32 75, i32 78, i32 81, i32 84, i32 87, i32 90, i32 93>
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
+; CHECK-NEXT: store <32 x i8> [[STRIDED_VEC]], ptr [[TMP4]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK: vec.epilog.iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK: vec.epilog.ph:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK: vec.epilog.vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC]], i64 [[TMP6]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
+; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <24 x i8>, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <24 x i8> [[WIDE_VEC2]], <24 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
+; CHECK-NEXT: store <8 x i8> [[STRIDED_VEC3]], ptr [[TMP10]], align 1
+; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 8
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 24
+; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK: vec.epilog.middle.block:
+; CHECK-NEXT: br label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK: vec.epilog.scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC]], i64 [[IV]], i64 0
+; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 32
+; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.src = getelementptr inbounds [3 x i8], ptr %src, i64 %iv, i64 0
+ %l = load i8, ptr %gep.src, align 1
+ %gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv
+ store i8 %l, ptr %gep.dst, align 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %cmp = icmp eq i64 %iv.next, 32
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll
index 7340167519fed9..a23e6326d8631d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=loop-vectorize -S -mcpu=skylake-avx512 -S %s | FileCheck %s
+; RUN: opt -passes=loop-vectorize -mcpu=skylake-avx512 -S %s | FileCheck %s
target triple = "x86_64-apple-macos"
@@ -12,7 +12,7 @@ define void @pr56319(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -26,32 +26,32 @@ define void @pr56319(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: store <32 x i8> [[STRIDED_VEC]], ptr [[TMP4]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 32, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX1]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC]], i64 [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
-; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <24 x i8>, ptr [[TMP8]], align 1
-; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <24 x i8> [[WIDE_VEC2]], <24 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <12 x i8>, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i8> [[WIDE_VEC2]], <12 x i8> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
-; CHECK-NEXT: store <8 x i8> [[STRIDED_VEC3]], ptr [[TMP10]], align 1
-; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[OFFSET_IDX]], 8
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 24
-; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT: store <4 x i8> [[STRIDED_VEC3]], ptr [[TMP10]], align 1
+; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 36
+; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 36, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 32, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -60,7 +60,7 @@ define void @pr56319(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 39
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
@@ -75,7 +75,7 @@ loop:
%gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv
store i8 %l, ptr %gep.dst, align 1
%iv.next = add nuw nsw i64 %iv, 1
- %cmp = icmp eq i64 %iv.next, 32
+ %cmp = icmp eq i64 %iv.next, 39
br i1 %cmp, label %exit, label %loop
exit:
More information about the llvm-commits
mailing list