[llvm] 9b830c7 - [AArch64][SVE] Teach cost model masked gathers/scatters are cheap

Matthew Devereau via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 26 03:18:31 PDT 2021


Author: Matthew Devereau
Date: 2021-08-26T11:17:47+01:00
New Revision: 9b830c798ebf2695772e8eaaeed3cd24fa66b68b

URL: https://github.com/llvm/llvm-project/commit/9b830c798ebf2695772e8eaaeed3cd24fa66b68b
DIFF: https://github.com/llvm/llvm-project/commit/9b830c798ebf2695772e8eaaeed3cd24fa66b68b.diff

LOG: [AArch64][SVE] Teach cost model masked gathers/scatters are cheap

Tell the cost model to use the scalable calculation for non-neon fixed vector.
This results in a cheaper cost for fixed-length SVE masked gathers/scatters
allowing the vectorizor to emit them more frequently.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b479b4808ec2a..367c980930cd8 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1569,8 +1569,7 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
 InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
     unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
     Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
-
-  if (!isa<ScalableVectorType>(DataTy))
+  if (useNeonVector(DataTy))
     return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
                                          Alignment, CostKind, I);
   auto *VT = cast<VectorType>(DataTy);

diff  --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
index 2adc51628a4c6..fa51efd0a87c9 100644
--- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
@@ -92,8 +92,8 @@ define <4 x i8> @gather_load_4xi8_constant_mask(<4 x i8*> %ptrs) {
 ; CHECK:         gather_load_4xi8_constant_mask
 ; CHECK-NEON:    Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
 ;
   %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
   ret <4 x i8> %lv
@@ -103,8 +103,8 @@ define <4 x i8> @gather_load_4xi8_variable_mask(<4 x i8*> %ptrs, <4 x i1> %cond)
 ; CHECK:         gather_load_4xi8_variable_mask
 ; CHECK-NEON:    Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
 ;
   %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
   ret <4 x i8> %lv
@@ -115,8 +115,8 @@ define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x i8*> %ptrs) {
 ; CHECK:         scatter_store_4xi8_constant_mask
 ; CHECK-NEON:    Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
 ;
   call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
   ret void
@@ -126,8 +126,8 @@ define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x i8*> %ptrs, <4
 ; CHECK:         scatter_store_4xi8_variable_mask
 ; CHECK-NEON:    Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0i8(
 ;
   call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %cond)
   ret void
@@ -138,8 +138,8 @@ define <4 x i32> @gather_load_4xi32_constant_mask(<4 x i32*> %ptrs) {
 ; CHECK:         gather_load_4xi32_constant_mask
 ; CHECK-NEON:    Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
 ;
   %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
   ret <4 x i32> %lv
@@ -149,8 +149,8 @@ define <4 x i32> @gather_load_4xi32_variable_mask(<4 x i32*> %ptrs, <4 x i1> %co
 ; CHECK:         gather_load_4xi32_variable_mask
 ; CHECK-NEON:    Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
 ;
   %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
   ret <4 x i32> %lv
@@ -161,8 +161,8 @@ define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x i32*> %ptrs)
 ; CHECK:         scatter_store_4xi32_constant_mask
 ; CHECK-NEON:    Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
 ;
   call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
   ret void
@@ -172,9 +172,56 @@ define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x i32*> %ptrs,
 ; CHECK:         scatter_store_4xi32_variable_mask
 ; CHECK-NEON:    Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0i32(
 ;
   call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> %cond)
   ret void
 }
+
+declare <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*>, i32, <256 x i1>, <256 x i16>)
+define void @sve_gather_vls(<256 x i1> %v256i1mask) {
+; CHECK-LABEL: 'sve_scatter_vls'
+; CHECK-NEON: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+entry:
+  %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+  ret void
+}
+
+declare <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*>, i32, <256 x i1>, <256 x float>)
+define void @sve_gather_vls_float(<256 x i1> %v256i1mask) {
+; CHECK-LABEL: 'sve_gather_vls_float'
+; CHECK-NEON: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+entry:
+  %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+  ret void
+}
+
+declare void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8>, <256 x i8*>, i32, <256 x i1>)
+define void @sve_scatter_vls(<256 x i1> %v256i1mask){
+; CHECK-LABEL: 'sve_scatter_vls'
+; CHECK-NEON: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
+entry:
+  call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
+  ret void
+}
+
+declare void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half>, <512 x half*>, i32, <512 x i1>)
+define void @sve_scatter_vls_float(<512 x i1> %v512i1mask){
+; CHECK-LABEL: 'sve_scatter_vls_float'
+; CHECK-NEON: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 512 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 512 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
+  call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
+  ret void
+}
\ No newline at end of file


        


More information about the llvm-commits mailing list