[llvm] 595270a - [ARM][MVE] Refactor option -disable-mve-tail-predication

Mon Jul 13 05:41:20 PDT 2020

Author: Sjoerd Meijer
Date: 2020-07-13T13:40:33+01:00
New Revision: 595270ae39671eed49b75983beeab13de74a342b

URL: https://github.com/llvm/llvm-project/commit/595270ae39671eed49b75983beeab13de74a342b
DIFF: https://github.com/llvm/llvm-project/commit/595270ae39671eed49b75983beeab13de74a342b.diff

LOG: [ARM][MVE] Refactor option -disable-mve-tail-predication

This refactors option -disable-mve-tail-predication to take different arguments
so that we have 1 option to control tail-predication rather than several
different ones.

This is also a prep step for D82953, in which we want to reject reductions
unless that is requested with this option.

Differential Revision: https://reviews.llvm.org/D83133

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/lib/Target/ARM/ARMTargetTransformInfo.h
    llvm/lib/Target/ARM/MVETailPredication.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll
    llvm/test/CodeGen/Thumb2/mve-fma-loops.ll
    llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 74b1331216a0..575e6171059d 100644

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -45,7 +45,7 @@ static cl::opt<bool> DisableLowOverheadLoops(
   "disable-arm-loloops", cl::Hidden, cl::init(false),
   cl::desc("Disable the generation of low-overhead loops"));
 
-extern cl::opt<bool> DisableTailPredication;
+extern cl::opt<TailPredication::Mode> EnableTailPredication;
 
 extern cl::opt<bool> EnableMaskedGatherScatters;
 
@@ -1458,7 +1458,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
                                              TargetLibraryInfo *TLI,
                                              DominatorTree *DT,
                                              const LoopAccessInfo *LAI) {
-  if (DisableTailPredication)
+  if (!EnableTailPredication)
     return false;
 
   // Creating a predicated vector loop is the first step for generating a
@@ -1501,7 +1501,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
 }
 
 bool ARMTTIImpl::emitGetActiveLaneMask() const {
-  if (!ST->hasMVEIntegerOps() || DisableTailPredication)
+  if (!ST->hasMVEIntegerOps() || !EnableTailPredication)
     return false;
 
   // Intrinsic @llvm.get.active.lane.mask is supported.

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 537a546361ee..7bf6de4bffe0 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -38,6 +38,16 @@ class ScalarEvolution;
 class Type;
 class Value;
 
+namespace TailPredication {
+  enum Mode {
+    Disabled = 0,
+    EnabledNoReductions,
+    Enabled,
+    ForceEnabledNoReductions,
+    ForceEnabled
+  };
+}
+
 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
   using TTI = TargetTransformInfo;

diff  --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
index 6583dcb77e1e..5bf3522ab2e6 100644
--- a/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -42,6 +42,7 @@
 
 #include "ARM.h"
 #include "ARMSubtarget.h"
+#include "ARMTargetTransformInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -64,16 +65,27 @@ using namespace llvm;
 #define DEBUG_TYPE "mve-tail-predication"
 #define DESC "Transform predicated vector loops to use MVE tail predication"
 
-static cl::opt<bool>
-ForceTailPredication("force-mve-tail-predication", cl::Hidden, cl::init(false),
-                     cl::desc("Force MVE tail-predication even if it might be "
-                              "unsafe (e.g. possible overflow in loop "
-                              "counters)"));
+cl::opt<TailPredication::Mode> EnableTailPredication(
+   "tail-predication", cl::desc("MVE tail-predication options"),
+   cl::init(TailPredication::Disabled),
+   cl::values(clEnumValN(TailPredication::Disabled, "disabled",
+                         "Don't tail-predicate loops"),
+              clEnumValN(TailPredication::EnabledNoReductions,
+                         "enabled-no-reductions",
+                         "Enable tail-predication, but not for reduction loops"),
+              clEnumValN(TailPredication::Enabled,
+                         "enabled",
+                         "Enable tail-predication, including reduction loops"),
+              clEnumValN(TailPredication::ForceEnabledNoReductions,
+                         "force-enabled-no-reductions",
+                         "Enable tail-predication, but not for reduction loops, "
+                         "and force this which might be unsafe"),
+              clEnumValN(TailPredication::ForceEnabled,
+                         "force-enabled",
+                         "Enable tail-predication, including reduction loops, "
+                         "and force this which might be unsafe")));
+
 
-cl::opt<bool>
-DisableTailPredication("disable-mve-tail-predication", cl::Hidden,
-                       cl::init(true),
-                       cl::desc("Disable MVE Tail Predication"));
 namespace {
 
 class MVETailPredication : public LoopPass {
@@ -146,7 +158,7 @@ static bool IsMasked(Instruction *I) {
 }
 
 bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
-  if (skipLoop(L) || DisableTailPredication)
+  if (skipLoop(L) || !EnableTailPredication)
     return false;
 
   MaskedInsts.clear();
@@ -346,6 +358,9 @@ static void Cleanup(SetVector<Instruction*> &MaybeDead, Loop *L) {
 //    vector width.
 bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
     Value *TripCount, FixedVectorType *VecTy) {
+  bool ForceTailPredication =
+    EnableTailPredication == TailPredication::ForceEnabledNoReductions ||
+    EnableTailPredication == TailPredication::ForceEnabled;
   // 1) Test whether entry to the loop is protected by a conditional
   // BTC + 1 < 0. In other words, if the scalar trip count overflows,
   // becomes negative, we shouldn't enter the loop and creating

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll
index a00af0d6a9ec..5fced6ad29e2 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; CHECK-LABEL: mul_v16i8
 ; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll
index dab642b94be0..56343a6d65cb 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -disable-mve-tail-predication=false %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -tail-predication=enabled %s -S -o - | FileCheck %s
 
 define hidden i32 @_Z4loopPiPjiS0_i(i32* noalias nocapture readonly %s1, i32* noalias nocapture readonly %s2, i32 %x, i32* noalias nocapture %d, i32 %n) {
 ; CHECK-LABEL: @_Z4loopPiPjiS0_i(

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
index bf6e92a1c883..e98276e258ab 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s
 
 define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c, i32 %N) {
 ; CHECK-LABEL: vpsel_mul_reduce_add:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll
index 8d201a23a689..1fda5c08a037 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
 
 define dso_local arm_aapcs_vfpcc void @sext_i8(i16* noalias nocapture %a, i8* nocapture readonly %b, i32 %N) {
 ; CHECK-LABEL: sext_i8:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
index 8c18159c24c5..d8d6af3b9a8d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -tail-predication=enabled %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) {
 ; CHECK-LABEL: fast_float_mul:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll
index bc2c7e084ea7..fddbfa8b6620 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O3 -disable-mve-tail-predication=false -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s
+; RUN: llc -O3 -tail-predication=enabled -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m-arm-none-eabi"

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
index 6c1273db3f80..428c703dd341 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) {
 ; CHECK-LABEL: test_acc_scalar_char:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
index 64702cc3c315..548ba396bed4 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -tail-predication=enabled %s -o - | FileCheck %s
 
 define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readonly %B, i32* noalias nocapture %C, i32 %N) {
 ; CHECK-LABEL: @mat_vec_sext_i16(

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index 12c3ca0525f2..66601dd66cb2 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
 
 define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
 ; CHECK-LABEL: one_loop_add_add_v16i8:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
index 13d750310a56..065e534dd55b 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
 
 define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 ; CHECK-LABEL: @foo(

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll
index d405657f4d17..e9facfda6133 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
 define arm_aapcs_vfpcc void @uadd_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) {
 ; CHECK-LABEL: uadd_sat:
 ; CHECK:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll
index d3247a3fd28e..87f23adf7ffa 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
 define arm_aapcs_vfpcc void @fabs(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %blockSize) {
 ; CHECK-LABEL: fabs:
 ; CHECK:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
index 962e9df3dc1e..e72e81da7e7c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
 define arm_aapcs_vfpcc void @round(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %n) #0 {
 ; CHECK-LABEL: round:
 ; CHECK:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
index 790311a54aa1..3c7ae4dc734a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
 define arm_aapcs_vfpcc void @usub_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) {
 ; CHECK-LABEL: usub_sat:
 ; CHECK:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll
index 8c1534be77db..52cd8fdc6d79 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; TODO: We should be able to generate a vctp for the loads.
 ; CHECK-LABEL: trunc_v4i32_v4i16

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll
index 1926bbeeaa70..8e46e3385385 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; The following functions should all fail to become tail-predicated.
 ; CHECK-NOT: call i32 @llvm.arm.vctp

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
index 3a9d3d117126..b40b36ced4af 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; CHECK-LABEL: expand_v8i16_v8i32
 ; CHECK-NOT: call i32 @llvm.arm.mve.vctp

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll
index 5c753134744d..f3055bc8a575 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll
@@ -1,6 +1,6 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false \
-; RUN:    -force-mve-tail-predication -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=force-enabled \
+; RUN:    -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE
 
 ; CHECK-LABEL: reduction_i32
 ; CHECK: phi i32 [ 0, %vector.ph ]

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
index f1242db36485..4db17c074643 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
 
 define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr {
 ; CHECK-LABEL: varying_outer_2d_reduction:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
index 26a570ac4c29..615334300c28 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s
 
 define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
 ; CHECK-LABEL: mul_reduce_add:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
index aaeae75e072f..e10cc3153b9c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
@@ -1,5 +1,5 @@
 
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
 
 ; CHECK-LABEL: vec_mul_reduce_add
 

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll
index 19d9c89dabca..f1a35af8b57e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; TODO: The unrolled pattern is preventing the transform
 ; CHECK-LABEL: mul_v16i8_unroll

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll
index 0ba224415b67..306f31be27f9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc void @fmas1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) {
 ; CHECK-LABEL: fmas1:

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
index ff3e03c7bad4..ac6bb56ff5f8 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
@@ -1,19 +1,19 @@
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize -S < %s | \
+; RUN:   -tail-predication=enabled -loop-vectorize -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=-mve \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=false -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve \
-; RUN:   -disable-mve-tail-predication=true -loop-vectorize \
+; RUN:   -tail-predication=disabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
@@ -21,24 +21,24 @@
 ; 'isHardwareLoopProfitable' return false, so that we test avoiding folding for
 ; these cases.
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve,-lob \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
 ; RUN:   -prefer-predicate-over-epilog=false \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
 ; RUN:   -prefer-predicate-over-epilog=true \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,FOLDING-OPT
 

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
index 23ecf5b6015c..8a327f163b12 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-vectorize -S | FileCheck %s --check-prefixes=COMMON,DEFAULT
-; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false  -prefer-predicate-over-epilog -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER
-; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-ENABLE-TP
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-ENABLE-TP
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-unknown-eabihf"

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
index f3e1af6f14ff..eda3c115c0f6 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
@@ -1,7 +1,7 @@
-; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -S | \
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled -S | \
 ; RUN:  FileCheck %s -check-prefixes=COMMON,CHECK
 
-; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -prefer-predicate-over-epilog -S | \
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | \
 ; RUN:   FileCheck -check-prefixes=COMMON,PREDFLAG %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"