[llvm] b1aba03 - [ARM] Enable MVE masked loads and stores

Mon Dec 9 03:38:12 PST 2019

Author: David Green
Date: 2019-12-09T11:37:34Z
New Revision: b1aba0378e52be51cfb7fb6f03417ebf408d66cc

URL: https://github.com/llvm/llvm-project/commit/b1aba0378e52be51cfb7fb6f03417ebf408d66cc
DIFF: https://github.com/llvm/llvm-project/commit/b1aba0378e52be51cfb7fb6f03417ebf408d66cc.diff

LOG: [ARM] Enable MVE masked loads and stores

With the extra optimisations we have done, these should now be fine to
enable by default. Which is what this patch does.

Differential Revision: https://reviews.llvm.org/D70968

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
    llvm/test/CodeGen/Thumb2/mve-intrinsics/load-store.ll
    llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll
    llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll
    llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll
    llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
    llvm/test/CodeGen/Thumb2/mve-masked-load.ll
    llvm/test/CodeGen/Thumb2/mve-masked-store.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
    llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 6b810b484bb3..16b5ad34bf22 100644

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 #define DEBUG_TYPE "armtti"
 
 static cl::opt<bool> EnableMaskedLoadStores(
-  "enable-arm-maskedldst", cl::Hidden, cl::init(false),
+  "enable-arm-maskedldst", cl::Hidden, cl::init(true),
   cl::desc("Enable the generation of masked loads and stores"));
 
 static cl::opt<bool> DisableLowOverheadLoops(

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
index 0b50b9a1db4e..c2bc193530f6 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -enable-arm-maskedldst=true -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
 
 ; CHECK-LABEL: vpsel_mul_reduce_add
 ; CHECK:      dls lr, lr

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
index f7c9236c6e62..6b226c4c8605 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16 -enable-arm-maskedldst=true -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) {
 ; CHECK-LABEL: fast_float_mul:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
index 23c447284293..b41068eba74e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false -enable-arm-maskedldst=true %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) {
 ; CHECK-LABEL: test_acc_scalar_char:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
index 04f408d78acb..0b9ab2404873 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -enable-arm-maskedldst=true -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
 
 define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
 ; CHECK-LABEL: mul_reduce_add:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/load-store.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/load-store.ll
index 9780b0864cc7..6b5a1dd2bf21 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/load-store.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/load-store.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -enable-arm-maskedldst -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
 
 define arm_aapcs_vfpcc <8 x half> @test_vld1q_f16(half* %base) {
 ; CHECK-LABEL: test_vld1q_f16:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll
index ba3ef58c3c2e..5e2e85017e23 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
-; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) {
 ; CHECK-LABEL: ldrwu32_4:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll
index 0951589eaa14..f80372209e3f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
-; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) {
 ; CHECK-LABEL: ldrwu32_4:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll
index beb5aae63411..424e16d22dc9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
-; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) {
 ; CHECK-LABEL: ldrwu32_4:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
index 100a082fd12b..b92163bcbd32 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
-; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
 ; CHECK-LABEL: foo_v4i32_v4i32:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
index 54a94b8981c2..5bddc5ed8333 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
-; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_zero(<4 x i32> *%dest, <4 x i32> %a) {
 ; CHECK-LE-LABEL: masked_v4i32_align4_zero:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll
index 425162721acf..7e320626dd51 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
-; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define arm_aapcs_vfpcc void @masked_v4i32(<4 x i32> *%dest, <4 x i32> %a) {
 ; CHECK-LE-LABEL: masked_v4i32:

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
index 5d30aba67825..6b036c3b1787 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s
+; RUN: opt -loop-vectorize < %s -S -o - | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1-m.main-none-eabi"

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
index a57a5ad0eba2..e5dcc6773f0f 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
@@ -1,5 +1,5 @@
-; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s --check-prefix=CHECK
-; RUN: opt -loop-vectorize -enable-arm-maskedldst -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
+; RUN: opt -loop-vectorize < %s -S -o - | FileCheck %s --check-prefix=CHECK
+; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
index ac2b65036fe0..32c08efd06f7 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf \
 ; RUN:   -disable-mve-tail-predication=false -loop-vectorize -S < %s | \
-; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
+; RUN:   FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=-mve \
 ; RUN:   -disable-mve-tail-predication=false -loop-vectorize \

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
index af755c887428..72321f0401a0 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
@@ -1,7 +1,7 @@
-; RUN: opt < %s -loop-vectorize -enable-arm-maskedldst -S | \
+; RUN: opt < %s -loop-vectorize -S | \
 ; RUN:  FileCheck %s -check-prefixes=COMMON,CHECK
 
-; RUN: opt < %s -loop-vectorize -enable-arm-maskedldst -prefer-predicate-over-epilog -S | \
+; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilog -S | \
 ; RUN:   FileCheck -check-prefixes=COMMON,PREDFLAG %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"