[llvm] f008b5b - [ARM] Additional tests and minor formatting. NFC

Mon Dec 9 02:25:28 PST 2019

Author: David Green
Date: 2019-12-09T10:24:33Z
New Revision: f008b5b8ce724d60f0f0eeafceee0119c42022d4

URL: https://github.com/llvm/llvm-project/commit/f008b5b8ce724d60f0f0eeafceee0119c42022d4
DIFF: https://github.com/llvm/llvm-project/commit/f008b5b8ce724d60f0f0eeafceee0119c42022d4.diff

LOG: [ARM] Additional tests and minor formatting. NFC

This adds some extra cost model tests for shifts, and does some minor
adjustments to some Neon code to make it clear as to what it applies to.
Both NFC.

Added: 
    llvm/test/Analysis/CostModel/ARM/freeshift.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll

Modified: 
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 10faeb75b337..f7443c8577a8 100644

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -650,50 +650,50 @@ int ARMTTIImpl::getArithmeticInstrCost(
   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
-  const unsigned FunctionCallDivCost = 20;
-  const unsigned ReciprocalDivCost = 10;
-  static const CostTblEntry CostTbl[] = {
-    // Division.
-    // These costs are somewhat random. Choose a cost of 20 to indicate that
-    // vectorizing devision (added function call) is going to be very expensive.
-    // Double registers types.
-    { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
-    { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
-    { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
-    { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
-    { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
-    // Quad register types.
-    { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
-    { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
-    { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
-    { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
-    { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
-    // Multiplication.
-  };
-
   if (ST->hasNEON()) {
+    const unsigned FunctionCallDivCost = 20;
+    const unsigned ReciprocalDivCost = 10;
+    static const CostTblEntry CostTbl[] = {
+      // Division.
+      // These costs are somewhat random. Choose a cost of 20 to indicate that
+      // vectorizing devision (added function call) is going to be very expensive.
+      // Double registers types.
+      { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
+      { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
+      { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
+      { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
+      { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
+      // Quad register types.
+      { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+      { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+      { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+      { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+      { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+      // Multiplication.
+    };
+
     if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
       return LT.first * Entry->Cost;
 

diff  --git a/llvm/test/Analysis/CostModel/ARM/freeshift.ll b/llvm/test/Analysis/CostModel/ARM/freeshift.ll
new file mode 100644
index 000000000000..8e48a7f7cf4d
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/ARM/freeshift.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s
+
+define void @shl(i32 %a, i32 %b) {
+; CHECK-LABEL: 'shl'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %as = shl i32 %a, 3
+  %ac = add i32 %b, %as
+  %ss = shl i32 %a, 3
+  %sc = sub i32 %b, %ss
+  %xs = shl i32 %a, 3
+  %xc = xor i32 %b, %xs
+  %ns = shl i32 %a, 3
+  %nc = and i32 %b, %ns
+  %os = shl i32 %a, 3
+  %oc = or i32 %b, %os
+  %is = shl i32 %a, 3
+  %ic = icmp eq i32 %b, %is
+  ret void
+}
+
+define void @ashr(i32 %a, i32 %b) {
+; CHECK-LABEL: 'ashr'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %as = ashr i32 %a, 3
+  %ac = add i32 %b, %as
+  %ss = ashr i32 %a, 3
+  %sc = sub i32 %b, %ss
+  %xs = ashr i32 %a, 3
+  %xc = xor i32 %b, %xs
+  %ns = ashr i32 %a, 3
+  %nc = and i32 %b, %ns
+  %os = ashr i32 %a, 3
+  %oc = or i32 %b, %os
+  %is = ashr i32 %a, 3
+  %ic = icmp eq i32 %b, %is
+  ret void
+}
+
+define void @lshr(i32 %a, i32 %b) {
+; CHECK-LABEL: 'lshr'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %as = lshr i32 %a, 3
+  %ac = add i32 %b, %as
+  %ss = lshr i32 %a, 3
+  %sc = sub i32 %b, %ss
+  %xs = lshr i32 %a, 3
+  %xc = xor i32 %b, %xs
+  %ns = lshr i32 %a, 3
+  %nc = and i32 %b, %ns
+  %os = lshr i32 %a, 3
+  %oc = or i32 %b, %os
+  %is = lshr i32 %a, 3
+  %ic = icmp eq i32 %b, %is
+  ret void
+}
+

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
new file mode 100644
index 000000000000..858d3d88099e
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
@@ -0,0 +1,86 @@
+; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s --check-prefix=CHECK
+; RUN: opt -loop-vectorize -enable-arm-maskedldst -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8.1m.main-arm-none-eabi"
+
+; CHECK-LABEL: test
+; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction:   %and515 = shl i32 %l41, 3
+; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction:   %l45 = and i32 %and515, 131072
+; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %and515 = shl i32 %l41, 3
+; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %l45 = and i32 %and515, 131072
+; CHECK: vector.body
+
+define void @test([101 x i32] *%src, i32 %N) #0 {
+entry:
+  br label %for.body386
+  
+for.body386:                                      ; preds = %entry, %l77
+  %add387 = phi i32 [ %inc532, %l77 ], [ 0, %entry ]
+  %arrayidx388 = getelementptr inbounds [101 x i32], [101 x i32]* %src, i32 0, i32 %add387
+  %l41 = load i32, i32* %arrayidx388, align 4
+  %l42 = and i32 %l41, 65535
+  %l43 = icmp eq i32 %l42, 0
+  br i1 %l43, label %l77, label %l44
+
+l44:                                               ; preds = %for.body386
+  %and515 = shl i32 %l41, 3
+  %l45 = and i32 %and515, 131072
+  %and506 = shl i32 %l41, 5
+  %l46 = and i32 %and506, 262144
+  %and497 = shl i32 %l41, 7
+  %l47 = and i32 %and497, 524288
+  %and488 = shl i32 %l41, 9
+  %l48 = and i32 %and488, 1048576
+  %and479 = shl i32 %l41, 11
+  %l49 = and i32 %and479, 2097152
+  %and470 = shl i32 %l41, 13
+  %l50 = and i32 %and470, 4194304
+  %and461 = shl i32 %l41, 15
+  %l51 = and i32 %and461, 8388608
+  %and452 = shl i32 %l41, 17
+  %l52 = and i32 %and452, 16777216
+  %and443 = shl i32 %l41, 19
+  %l53 = and i32 %and443, 33554432
+  %and434 = shl i32 %l41, 21
+  %l54 = and i32 %and434, 67108864
+  %and425 = shl i32 %l41, 23
+  %l55 = and i32 %and425, 134217728
+  %and416 = shl i32 %l41, 25
+  %l56 = and i32 %and416, 268435456
+  %and407 = shl i32 %l41, 27
+  %l57 = and i32 %and407, 536870912
+  %and398 = shl i32 %l41, 29
+  %l58 = and i32 %and398, 1073741824
+  %l59 = shl i32 %l41, 31
+  %l60 = or i32 %l59, %l41
+  %l61 = or i32 %l58, %l60
+  %l62 = or i32 %l57, %l61
+  %l63 = or i32 %l56, %l62
+  %l64 = or i32 %l55, %l63
+  %l65 = or i32 %l54, %l64
+  %l66 = or i32 %l53, %l65
+  %l67 = or i32 %l52, %l66
+  %l68 = or i32 %l51, %l67
+  %l69 = or i32 %l50, %l68
+  %l70 = or i32 %l49, %l69
+  %l71 = or i32 %l48, %l70
+  %l72 = or i32 %l47, %l71
+  %l73 = or i32 %l46, %l72
+  %l74 = or i32 %l45, %l73
+  %and524 = shl i32 %l41, 1
+  %l75 = and i32 %and524, 65536
+  %l76 = or i32 %l75, %l74
+  store i32 %l76, i32* %arrayidx388, align 4
+  br label %l77
+
+l77:                                               ; preds = %for.body386, %l44
+  %inc532 = add nuw nsw i32 %add387, 1
+  %exitcond649 = icmp eq i32 %inc532, %N
+  br i1 %exitcond649, label %exit, label %for.body386
+
+exit:
+  ret void
+}
+
+attributes #0 = { nounwind "min-legal-vector-width"="0" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "use-soft-float"="false" }