[llvm] 3a1c6ce - [AArch64] Add tests for masked.gather costs.

Mon Nov 23 09:36:06 PST 2020

Author: Florian Hahn
Date: 2020-11-23T17:33:27Z
New Revision: 3a1c6cec15e32e4aa5593ce624915bda790dadff

URL: https://github.com/llvm/llvm-project/commit/3a1c6cec15e32e4aa5593ce624915bda790dadff
DIFF: https://github.com/llvm/llvm-project/commit/3a1c6cec15e32e4aa5593ce624915bda790dadff.diff

LOG: [AArch64] Add tests for masked.gather costs.

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
index 3a4e0f080a49..300339313f06 100644

--- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
@@ -86,3 +86,27 @@ define <8 x i64> @load_512(<8 x i64>* %ptr) {
   %out = load <8 x i64>, <8 x i64>* %ptr
   ret <8 x i64> %out
 }
+
+define <4 x i8> @gather_load_4xi8(<4 x i8*> %ptrs) {
+; CHECK:         gather_load_4xi8
+; CHECK-NEON:    Cost Model: Found an estimated cost of 1 for instruction:   %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:   %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:   %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:   %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+;
+  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+  ret <4 x i8> %lv
+}
+declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>)
+
+define <4 x i32> @gather_load_4xi32(<4 x i32*> %ptrs) {
+; CHECK:         gather_load_4xi32
+; CHECK-NEON:    Cost Model: Found an estimated cost of 1 for instruction:   %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:   %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:   %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:   %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+;
+  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  ret <4 x i32> %lv
+}
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32 immarg, <4 x i1>, <4 x i32>)

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index 4c2d51b188e6..021f9d5d9419 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -slp-vectorizer -instcombine -pass-remarks-output=%t | FileCheck %s
 ; RUN: cat %t | FileCheck -check-prefix=REMARK %s
-; RUN: opt < %s -S -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
+; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
 ; RUN: cat %t | FileCheck -check-prefix=REMARK %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -12,6 +12,11 @@ target triple = "aarch64--linux-gnu"
 ; REMARK-NEXT:    - String: 'Vectorized horizontal reduction with cost '
 ; REMARK-NEXT:    - Cost: '-7'
 ;
+; REMARK-LABEL: Function: gather_load
+; REMARK:       Args:
+; REMARK-NEXT:    - String: 'Stores SLP vectorized with cost
+; REMARK-NEXT:    - Cost: '-2'
+
 define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: @gather_multiple_use(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
@@ -51,3 +56,41 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
   %tmp22 = add i32 %tmp21, %tmp19
   ret i32 %tmp22
 }
+
+ at data = global [6 x [258 x i8]] zeroinitializer, align 1
+define void @gather_load(i16* noalias %ptr) {
+; CHECK-LABEL: @gather_load(
+; CHECK-NEXT:    [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> <i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3)>, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw <4 x i16> [[TMP2]], <i16 10, i16 20, i16 30, i16 40>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX182]] to <4 x i16>*
+; CHECK-NEXT:    store <4 x i16> [[TMP3]], <4 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    ret void
+;
+  %arrayidx182 = getelementptr inbounds i16, i16* %ptr, i64 1
+  %arrayidx183 = getelementptr inbounds i16, i16* %ptr, i64 2
+  %arrayidx184 = getelementptr inbounds i16, i16* %ptr, i64 3
+  %arrayidx185 = getelementptr inbounds i16, i16* %ptr, i64 4
+  %arrayidx149 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0
+  %l0 = load i8, i8* %arrayidx149, align 1
+  %conv150 = zext i8 %l0 to i16
+  %add152 = add i16 10, %conv150
+  %arrayidx155 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1
+  %l1 = load i8, i8* %arrayidx155, align 1
+  %conv156 = zext i8 %l1 to i16
+  %add158 = add i16 20, %conv156
+  %arrayidx161 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2
+  %l2 = load i8, i8* %arrayidx161, align 1
+  %conv162 = zext i8 %l2 to i16
+  %add164 = add i16 30, %conv162
+  %arrayidx167 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3
+  %l3 = load i8, i8* %arrayidx167, align 1
+  %conv168 = zext i8 %l3 to i16
+  %add170 = add i16 40, %conv168
+  store i16 %add152, i16* %arrayidx182, align 2
+  store i16 %add158, i16* %arrayidx183, align 2
+  store i16 %add164, i16* %arrayidx184, align 2
+  store i16 %add170, i16* %arrayidx185, align 2
+  ret void
+}