[llvm] 0457f50 - [RISCV] Implement storeOfVectorConstantIsCheap hook to prevent store merging at VL=2

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Wed May 17 11:14:08 PDT 2023


Author: Philip Reames
Date: 2023-05-17T11:13:57-07:00
New Revision: 0457f506fddf47cfe842b398c7f522057cef8163

URL: https://github.com/llvm/llvm-project/commit/0457f506fddf47cfe842b398c7f522057cef8163
DIFF: https://github.com/llvm/llvm-project/commit/0457f506fddf47cfe842b398c7f522057cef8163.diff

LOG: [RISCV] Implement storeOfVectorConstantIsCheap hook to prevent store merging at VL=2

In general, VL=2 vectors are very questionable profitability wise. For constants specifically, our inability to materialize many vector constants cheaply biases us strongly towards unprofitability at VL=2.

This hook is very close to the x86 implementation. The difference is that X86 whitelists stores of zeros, and we're better off letting that stay scalar at VL=2.

Differential Revision: https://reviews.llvm.org/D150798

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/test/CodeGen/RISCV/rvv/combine-store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3936c51884cb..6bf3a811b266 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -531,6 +531,13 @@ class RISCVTargetLowering : public TargetLowering {
     return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
   }
 
+  bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
+                                    unsigned AddrSpace) const override {
+    // If we can replace 4 or more scalar stores, there will be a reduction
+    // in instructions even after we add a vector constant load.
+    return NumElem >= 4;
+  }
+
   bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
     return VT.isScalarInteger();
   }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/combine-store.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
index 9640d7591a9b..c7187148f571 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
@@ -31,9 +31,8 @@ define void @combine_zero_stores_4xi8(ptr %p) {
 define void @combine_zero_stores_8xi8(ptr %p) {
 ; RV32-LABEL: combine_zero_stores_8xi8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vse32.v v8, (a0)
+; RV32-NEXT:    sw zero, 0(a0)
+; RV32-NEXT:    sw zero, 4(a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: combine_zero_stores_8xi8:
@@ -72,9 +71,8 @@ define void @combine_zero_stores_2xi16(ptr %p) {
 define void @combine_zero_stores_4xi16(ptr %p) {
 ; RV32-LABEL: combine_zero_stores_4xi16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vse32.v v8, (a0)
+; RV32-NEXT:    sw zero, 0(a0)
+; RV32-NEXT:    sw zero, 4(a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: combine_zero_stores_4xi16:
@@ -104,9 +102,8 @@ define void @combine_zero_stores_8xi16(ptr %p) {
 ;
 ; RV64-LABEL: combine_zero_stores_8xi16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    sd zero, 0(a0)
+; RV64-NEXT:    sd zero, 8(a0)
 ; RV64-NEXT:    ret
   store i16 zeroinitializer, ptr %p, align 16
   %gep1 = getelementptr i16, ptr %p, i64 1
@@ -129,9 +126,8 @@ define void @combine_zero_stores_8xi16(ptr %p) {
 define void @combine_zero_stores_2xi32(ptr %p) {
 ; RV32-LABEL: combine_zero_stores_2xi32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vse32.v v8, (a0)
+; RV32-NEXT:    sw zero, 0(a0)
+; RV32-NEXT:    sw zero, 4(a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: combine_zero_stores_2xi32:
@@ -154,9 +150,8 @@ define void @combine_zero_stores_4xi32(ptr %p) {
 ;
 ; RV64-LABEL: combine_zero_stores_4xi32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    sd zero, 0(a0)
+; RV64-NEXT:    sd zero, 8(a0)
 ; RV64-NEXT:    ret
   store i32 zeroinitializer, ptr %p, align 16
   %gep1 = getelementptr i32, ptr %p, i64 1
@@ -201,18 +196,11 @@ define void @combine_zero_stores_8xi32(ptr %p) {
 }
 
 define void @combine_zero_stores_2xi32_unaligned(ptr %p) {
-; RV32-LABEL: combine_zero_stores_2xi32_unaligned:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: combine_zero_stores_2xi32_unaligned:
-; RV64:       # %bb.0:
-; RV64-NEXT:    sw zero, 0(a0)
-; RV64-NEXT:    sw zero, 4(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: combine_zero_stores_2xi32_unaligned:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sw zero, 0(a0)
+; CHECK-NEXT:    sw zero, 4(a0)
+; CHECK-NEXT:    ret
   store i32 zeroinitializer, ptr %p
   %gep = getelementptr i8, ptr %p, i64 4
   store i32 zeroinitializer, ptr %gep
@@ -230,9 +218,8 @@ define void @combine_zero_stores_2xi64(ptr %p) {
 ;
 ; RV64-LABEL: combine_zero_stores_2xi64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    sd zero, 0(a0)
+; RV64-NEXT:    sd zero, 8(a0)
 ; RV64-NEXT:    ret
   store i64 zeroinitializer, ptr %p
   %gep = getelementptr i8, ptr %p, i64 8
@@ -243,10 +230,8 @@ define void @combine_zero_stores_2xi64(ptr %p) {
 define void @combine_fp_zero_stores_crash(ptr %ptr)  {
 ; CHECK-LABEL: combine_fp_zero_stores_crash:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    sw zero, 4(a0)
+; CHECK-NEXT:    sw zero, 8(a0)
 ; CHECK-NEXT:    ret
   %addr1 = getelementptr float, ptr %ptr, i64 1
   %addr2 = getelementptr float, ptr %ptr, i64 2


        


More information about the llvm-commits mailing list