[llvm] 4b581e8 - [LV] Add tests where rt checks may make vectorization unprofitable.

Mon Sep 27 02:34:08 PDT 2021

Author: Florian Hahn
Date: 2021-09-27T10:32:28+01:00
New Revision: 4b581e87df6bc2e6af9592aee11be1ec417049bb

URL: https://github.com/llvm/llvm-project/commit/4b581e87df6bc2e6af9592aee11be1ec417049bb
DIFF: https://github.com/llvm/llvm-project/commit/4b581e87df6bc2e6af9592aee11be1ec417049bb.diff

LOG: [LV] Add tests where rt checks may make vectorization unprofitable.

Add a few additional tests which require a large number of runtime
checks for D109368.

Added: 
    llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll

Modified: 
    llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
index ccb57dee6cbf..afea25a63572 100644

--- a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -mtriple=arm64-apple-iphoneos -S %s | FileCheck %s
+; RUN: opt -loop-vectorize -mtriple=arm64-apple-iphoneos -vectorizer-min-trip-count=8 -S %s | FileCheck %s
 
 ; Tests for loops with large numbers of runtime checks. Check that loops are
 ; vectorized, if the loop trip counts are large and the impact of the runtime
@@ -50,7 +50,7 @@ loop:                                             ; preds = %bb54, %bb37
   %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv
   store i16 %trunc.2, i16* %gep.4, align 2
   %iv.next = add nuw nsw i64 %iv, 1
-  %cmp = icmp ult i64 %iv, 50
+  %cmp = icmp ult i64 %iv, 10
   br i1 %cmp, label %loop, label %exit
 
 exit:
@@ -109,3 +109,55 @@ loop:                                             ; preds = %bb54, %bb37
 exit:
   ret void
 }
+
+define void @test_tc_unknown(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2, i64 %N) {
+; CHECK-LABEL: define void @test_tc_unknown
+; CHECK-NOT: vector.memcheck
+; CHECK-NOT: vector.body
+;
+entry:
+  br label %loop
+
+loop:                                             ; preds = %bb54, %bb37
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv
+  %lv.1 = load i16, i16* %gep.1, align 2
+  %ext.1 = sext i16 %lv.1 to i32
+  %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv
+  %lv.2 = load i16, i16* %gep.2, align 2
+  %ext.2 = sext i16 %lv.2 to i32
+  %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1
+  %lv.3 = load i16, i16* %gep.off.1, align 2
+  %ext.3 = sext i16 %lv.3 to i32
+  %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2
+  %lv.4 = load i16, i16* %gep.off.2, align 2
+  %ext.4 = sext i16 %lv.4 to i32
+  %tmp62 = mul nsw i32 %ext.2, 11
+  %tmp66 = mul nsw i32 %ext.3, -4
+  %tmp70 = add nsw i32 %tmp62, 4
+  %tmp71 = add nsw i32 %tmp70, %tmp66
+  %tmp72 = add nsw i32 %tmp71, %ext.4
+  %tmp73 = lshr i32 %tmp72, 3
+  %tmp74 = add nsw i32 %tmp73, %ext.1
+  %tmp75 = lshr i32 %tmp74, 1
+  %tmp76 = mul nsw i32 %ext.2, 5
+  %tmp77 = shl nsw i32 %ext.3, 2
+  %tmp78 = add nsw i32 %tmp76, 4
+  %tmp79 = add nsw i32 %tmp78, %tmp77
+  %tmp80 = sub nsw i32 %tmp79, %ext.4
+  %tmp81 = lshr i32 %tmp80, 3
+  %tmp82 = sub nsw i32 %tmp81, %ext.1
+  %tmp83 = lshr i32 %tmp82, 1
+  %trunc.1 = trunc i32 %tmp75 to i16
+  %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv
+  store i16 %trunc.1, i16* %gep.3, align 2
+  %trunc.2 = trunc i32 %tmp83 to i16
+  %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv
+  store i16 %trunc.2, i16* %gep.4, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cmp = icmp ult i64 %iv, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll
new file mode 100644
index 000000000000..31a70a36763b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll
@@ -0,0 +1,53 @@
+; REQUIRES: asserts
+
+; RUN: opt -runtime-memory-check-threshold=9 -passes='loop-vectorize' -mtriple=x86_64-unknown-linux -S -debug %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+target triple = "x86_64-unknown-linux"
+
+declare double @llvm.pow.f64(double, double)
+
+; Test case where the memory runtime checks and vector body is more expensive
+; than running the scalar loop.
+; TODO: should not be vectorized.
+define void @test(double* nocapture %A, double* nocapture %B, double* nocapture %C, double* nocapture %D, double* nocapture %E) {
+; CHECK-LABEL: @test(
+; CHECK: vector.memcheck
+; CHECK: vector.body
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.A = getelementptr inbounds double, double* %A, i64 %iv
+  %l.A = load double, double* %gep.A, align 4
+  store double 0.0, double* %gep.A, align 4
+  %p.1 = call double @llvm.pow.f64(double %l.A, double 2.0)
+
+  %gep.B = getelementptr inbounds double, double* %B, i64 %iv
+  %l.B = load double, double* %gep.B, align 4
+  %p.2 = call double @llvm.pow.f64(double %l.B, double %p.1)
+  store double 0.0, double* %gep.B, align 4
+
+  %gep.C = getelementptr inbounds double, double* %C, i64 %iv
+  %l.C = load double, double* %gep.C, align 4
+  %p.3 = call double @llvm.pow.f64(double %p.1, double %l.C)
+
+  %gep.D = getelementptr inbounds double, double* %D, i64 %iv
+  %l.D = load double, double* %gep.D
+  %p.4 = call double @llvm.pow.f64(double %p.3, double %l.D)
+  %p.5 = call double @llvm.pow.f64(double %p.4, double %p.3)
+  %mul = fmul double 2.0, %p.5
+  %mul.2 = fmul double %mul, 2.0
+  %mul.3 = fmul double %mul, %mul.2
+  %gep.E = getelementptr inbounds double, double* %E, i64 %iv
+  store double %mul.3, double* %gep.E, align 4
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}