[llvm] 02bfe2d - [RISCV] Adjust vector immediate store materialization cost

Thu Sep 29 07:37:27 PDT 2022

Author: Philip Reames
Date: 2022-09-29T07:37:13-07:00
New Revision: 02bfe2de7c3eb2f2318fa97bc30b15ea7505a584

URL: https://github.com/llvm/llvm-project/commit/02bfe2de7c3eb2f2318fa97bc30b15ea7505a584
DIFF: https://github.com/llvm/llvm-project/commit/02bfe2de7c3eb2f2318fa97bc30b15ea7505a584.diff

LOG: [RISCV] Adjust vector immediate store materialization cost

This change updates the costs to make constant pool loads match their actual cost, and adds the broadcast special case to avoid too many regressions. We really need more information about the constants being rematerialized, but this is an incremental improvement.

Differential Revision: https://reviews.llvm.org/D134746

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 94852c53fe3c0..110bc88e35d62 100644

--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -666,12 +666,17 @@ InstructionCost RISCVTTIImpl::getStoreImmCost(Type *Ty,
     // currently have here.
     return 0;
 
-  APInt PseudoAddr = APInt::getAllOnes(DL.getPointerSizeInBits());
-  // Add a cost of address load + the cost of the vector load.
-  return RISCVMatInt::getIntMatCost(PseudoAddr, DL.getPointerSizeInBits(),
-                                    getST()->getFeatureBits()) +
-    getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
-                    /*AddressSpace=*/0, CostKind);
+  if (OpInfo.isUniform())
+    // vmv.x.i, vmv.v.x, or vfmv.v.f
+    // We ignore the cost of the scalar constant materialization to be consistent
+    // with how we treat scalar constants themselves just above.
+    return 1;
+
+  // Add a cost of address generation + the cost of the vector load. The
+  // address is expected to be a PC relative offset to a constant pool entry
+  // using auipc/addi.
+  return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
+                             /*AddressSpace=*/0, CostKind);
 }
 
 

diff  --git a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
index c3b43935281a2..caf8a5c73c2fa 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
@@ -297,17 +297,17 @@ define void @store_of_constant(ptr %p) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 1, i32 1, i32 2, i32 1>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 2, i32 1, i32 1, i32 1>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -4>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 2, i32 4, i32 6, i32 8>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 -1, i32 0, i32 2, i32 1>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 256, i32 4096, i32 57, i32 1>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 1, i32 1, i32 2, i32 1>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 2, i32 1, i32 1, i32 1>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -4>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 2, i32 4, i32 6, i32 8>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 -1, i32 0, i32 2, i32 1>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 256, i32 4096, i32 57, i32 1>, ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll
index 324fad5e829be..03e3aa23b98ec 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll
@@ -222,7 +222,13 @@ entry:
 define void @store_stepvector_i32(ptr %dest) {
 ; CHECK-LABEL: @store_stepvector_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr [[DEST:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[DEST:%.*]], align 4
+; CHECK-NEXT:    [[INC1:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 1
+; CHECK-NEXT:    store i32 1, ptr [[INC1]], align 2
+; CHECK-NEXT:    [[INC2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 2
+; CHECK-NEXT:    store i32 2, ptr [[INC2]], align 2
+; CHECK-NEXT:    [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3
+; CHECK-NEXT:    store i32 3, ptr [[INC3]], align 2
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @store_stepvector_i32(
@@ -250,7 +256,13 @@ entry:
 define void @store_arbitrary_constant_i32(ptr %dest) {
 ; CHECK-LABEL: @store_arbitrary_constant_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store <4 x i32> <i32 0, i32 -33, i32 44, i32 77>, ptr [[DEST:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[DEST:%.*]], align 4
+; CHECK-NEXT:    [[INC1:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 1
+; CHECK-NEXT:    store i32 -33, ptr [[INC1]], align 2
+; CHECK-NEXT:    [[INC2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 2
+; CHECK-NEXT:    store i32 44, ptr [[INC2]], align 2
+; CHECK-NEXT:    [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3
+; CHECK-NEXT:    store i32 77, ptr [[INC3]], align 2
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @store_arbitrary_constant_i32(

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
index 00544c034cfad..347db4c58b63e 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
@@ -14,33 +14,33 @@ define void @foo(i64* nocapture writeonly %da) {
 ; CHECK-128-NEXT:  entry:
 ; CHECK-128-NEXT:    store i64 0, i64* [[DA:%.*]], align 8
 ; CHECK-128-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 1
-; CHECK-128-NEXT:    store i64 1, i64* [[ARRAYIDX1]], align 8
+; CHECK-128-NEXT:    store i64 0, i64* [[ARRAYIDX1]], align 8
 ; CHECK-128-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 2
-; CHECK-128-NEXT:    store i64 2, i64* [[ARRAYIDX2]], align 8
+; CHECK-128-NEXT:    store i64 0, i64* [[ARRAYIDX2]], align 8
 ; CHECK-128-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 3
-; CHECK-128-NEXT:    store i64 3, i64* [[ARRAYIDX3]], align 8
+; CHECK-128-NEXT:    store i64 0, i64* [[ARRAYIDX3]], align 8
 ; CHECK-128-NEXT:    ret void
 ;
 ; CHECK-256-LABEL: @foo(
 ; CHECK-256-NEXT:  entry:
 ; CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>*
-; CHECK-256-NEXT:    store <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64>* [[TMP0]], align 8
+; CHECK-256-NEXT:    store <4 x i64> zeroinitializer, <4 x i64>* [[TMP0]], align 8
 ; CHECK-256-NEXT:    ret void
 ;
 ; CHECK-512-LABEL: @foo(
 ; CHECK-512-NEXT:  entry:
 ; CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>*
-; CHECK-512-NEXT:    store <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64>* [[TMP0]], align 8
+; CHECK-512-NEXT:    store <4 x i64> zeroinitializer, <4 x i64>* [[TMP0]], align 8
 ; CHECK-512-NEXT:    ret void
 ;
 entry:
   store i64 0, i64* %da, align 8
   %arrayidx1 = getelementptr inbounds i64, i64* %da, i64 1
-  store i64 1, i64* %arrayidx1, align 8
+  store i64 0, i64* %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds i64, i64* %da, i64 2
-  store i64 2, i64* %arrayidx2, align 8
+  store i64 0, i64* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds i64, i64* %da, i64 3
-  store i64 3, i64* %arrayidx3, align 8
+  store i64 0, i64* %arrayidx3, align 8
   ret void
 }
 
@@ -49,14 +49,14 @@ define void @foo8(i8* nocapture writeonly %da) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i8 0, i8* [[DA:%.*]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 1
-; CHECK-NEXT:    store i8 1, i8* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    store i8 0, i8* [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
   store i8 0, i8* %da, align 8
   %arrayidx1 = getelementptr inbounds i8, i8* %da, i8 1
-  store i8 1, i8* %arrayidx1, align 8
+  store i8 0, i8* %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds i8, i8* %da, i8 2
   ret void
 }