[llvm] 3ef614a - NFC: update of ARM llvm regr test, follow up of 9633fc14aef7ee3da3b.

Tue Apr 14 13:30:45 PDT 2020

Author: Sjoerd Meijer
Date: 2020-04-14T21:30:22+01:00
New Revision: 3ef614a007a2f00664aa53a38b1b3b4f0259d7bc

URL: https://github.com/llvm/llvm-project/commit/3ef614a007a2f00664aa53a38b1b3b4f0259d7bc
DIFF: https://github.com/llvm/llvm-project/commit/3ef614a007a2f00664aa53a38b1b3b4f0259d7bc.diff

LOG: NFC: update of ARM llvm regr test, follow up of 9633fc14aef7ee3da3b.

Added: 
    

Modified: 
    llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
index 3bd65751c283..1b3a0a065507 100644

--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
@@ -14,7 +14,7 @@ target triple = "thumbv8.1m.main-arm-unknown-eabihf"
 ;
 define dso_local void @sgt_loopguard(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: @sgt_loopguard(
-; DEFAULT:      vector.body:
+; COMMON:       vector.body:
 ; CHECK-TF:     masked.load
 ; CHECK-TF:     masked.load
 ; CHECK-TF:     masked.store
@@ -52,7 +52,7 @@ while.end:
 ;
 define dso_local void @sgt_no_loopguard(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: @sgt_no_loopguard(
-; DEFAULT:      vector.body:
+; COMMON:       vector.body:
 ;
 ; FIXME: I think this is currently miscompiled after D77635
 ;
@@ -87,7 +87,7 @@ while.end:
 
 define dso_local void @sgt_extra_use_cmp(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: @sgt_extra_use_cmp(
-; DEFAULT:      vector.body:
+; COMMON:       vector.body:
 ; CHECK-TF:     masked.load
 ; CHECK-TF:     masked.load
 ; CHECK-TF:     masked.store
@@ -121,12 +121,9 @@ while.end:
 define dso_local void @sgt_const_tripcount(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: @sgt_const_tripcount(
 ; COMMON:       vector.body:
-;
-; FIXME: we expect tail-folding here, but is currently not happening:
-;
-; CHECK-TF-NOT: masked.load
-; CHECK-TF-NOT: masked.load
-; CHECK-TF-NOT: masked.store
+; CHECK-TF:     masked.load
+; CHECK-TF:     masked.load
+; CHECK-TF:     masked.store
 entry:
   %cmp5 = icmp sgt i32 %N, 0
   br i1 %cmp5, label %while.body.preheader, label %while.end
@@ -135,7 +132,7 @@ while.body.preheader:
   br label %while.body
 
 while.body:
-  %N.addr.09 = phi i32 [ %dec, %while.body ], [ 2048, %while.body.preheader ]
+  %N.addr.09 = phi i32 [ %dec, %while.body ], [ 2049, %while.body.preheader ]
   %c.addr.08 = phi i8* [ %incdec.ptr4, %while.body ], [ %c, %while.body.preheader ]
   %b.addr.07 = phi i8* [ %incdec.ptr1, %while.body ], [ %b, %while.body.preheader ]
   %a.addr.06 = phi i8* [ %incdec.ptr, %while.body ], [ %a, %while.body.preheader ]
@@ -159,7 +156,7 @@ while.end:
 
 define dso_local void @sgt_no_guard_0_startval(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: @sgt_no_guard_0_startval(
-; CHECK-NOT:   vector.body:
+; COMMON-NOT:   vector.body:
 entry:
   br label %while.body
 
@@ -223,8 +220,8 @@ while.end:
 }
 
 define dso_local void @sgt_step_not_constant(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N, i32 %S) local_unnamed_addr #0 {
-; COMMON-LABEL:  @sgt_step_not_constant(
-; CHECK-NOT:    vector.body:
+; COMMON-LABEL: @sgt_step_not_constant(
+; COMMON-NOT:   vector.body:
 entry:
   %cmp5 = icmp sgt i32 %N, 0
   br i1 %cmp5, label %while.body.preheader, label %while.end
@@ -257,7 +254,7 @@ while.end:
 
 define dso_local void @icmp_eq(i8* noalias nocapture readonly %A, i8* noalias nocapture readonly %B, i8* noalias nocapture %C, i32 %N) #0 {
 ; COMMON-LABEL: @icmp_eq
-; DEFAULT:      vector.body:
+; COMMON:       vector.body:
 ; TODO
 entry:
   %cmp6 = icmp eq i32 %N, 0
@@ -292,19 +289,26 @@ while.end:
 ; This IR corresponds to this type of C-code:
 ;
 ;  void f(char *a, char *b, char * __restrict c, int N) {
+;    #pragma clang loop vectorize_width(16)
 ;    for (int i = N; i>0; i--)
 ;      c[i] = a[i] + b[i];
 ;  }
 ;
 define dso_local void @sgt_for_loop(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: @sgt_for_loop(
+; COMMON :      vector.body:
+; CHECK-PREFER: masked.load
+; CHECK-PREFER: masked.load
+; CHECK-PREFER: masked.store
 ;
-; FIXME: we do want to support this case too, but is currently not recognised.
+; TODO: if tail-predication is requested, tail-folding isn't triggered because
+; the profitability check returns "Different strides found, can't tail-predicate",
+; investigate this.
+;
+; CHECK-ENABLE-TP-NOT: masked.load
+; CHECK-ENABLE-TP-NOT: masked.load
+; CHECK-ENABLE-TP-NOT: masked.store
 ;
-; DEFAULT-NOT:  vector.body:
-; CHECK-TF-NOT: masked.load
-; CHECK-TF-NOT: masked.load
-; CHECK-TF-NOT: masked.store
 entry:
   %cmp5 = icmp sgt i32 %N, 0
   br i1 %cmp5, label %for.body.preheader, label %for.end
@@ -323,7 +327,7 @@ for.body:
   store i8 %add, i8* %arrayidx4, align 1
   %dec = add nsw i32 %i.011, -1
   %cmp = icmp sgt i32 %i.011, 1
-  br i1 %cmp, label %for.body, label %for.end
+  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1
 
 for.end:
   ret void
@@ -331,16 +335,15 @@ for.end:
 
 define dso_local void @sgt_for_loop_i64(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: @sgt_for_loop_i64(
-; DEFAULT-NOT:  vector.body:
+; COMMON:       vector.body:
 ;
-; FIXME: this shoud be supported, but isn't currently.
-; CHECK-PREFER-NOT: masked.load
-; CHECK-PREFER-NOT: masked.load
-; CHECK-PREFER-NOT: masked.store
+; CHECK-PREFER: masked.load
+; CHECK-PREFER: masked.load
+; CHECK-PREFER: masked.store
 ;
-; With -disable-mve-tail-predication=false, the cost-model returns that
-; creating a hardwareloop is not profitable/possible, so here we don't
-; expect the tail-folding:
+; With -disable-mve-tail-predication=false, the target hook returns
+; "preferPredicateOverEpilogue: hardware-loop is not profitable."
+; so here we don't expect the tail-folding. TODO: look into this.
 ;
 ; CHECK-ENABLE-TP-NOT:  masked.load
 ; CHECK-ENABLE-TP-NOT:  masked.load
@@ -372,7 +375,7 @@ for.body:
   store i8 %add, i8* %arrayidx8, align 1
   %dec = add nsw i64 %i.015, -1
   %cmp = icmp sgt i64 %i.015, 1
-  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !1
 }
 
 ; This IR corresponds to this nested-loop:
@@ -430,3 +433,6 @@ for.body4:                                        ; preds = %for.body, %for.body
 }
 
 attributes #0 = { nofree norecurse nounwind "target-features"="+armv8.1-m.main,+mve.fp" }
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.width", i32 16}