[PATCH] D137699: [RISCV] Don't use zero-stride vector load if there's no optimized u-arch
Wang Pengcheng via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 10 23:19:58 PST 2022
pcwang-thead updated this revision to Diff 474666.
pcwang-thead added a comment.
Rename `FeatureNoOptimizedZeroStrideLoad` to `TuneNoOptimizedZeroStrideLoad`
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D137699/new/
https://reviews.llvm.org/D137699
Files:
llvm/lib/Target/RISCV/RISCV.td
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/lib/Target/RISCV/RISCVSubtarget.h
llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
Index: llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
===================================================================
--- llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
+++ llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+experimental-zvfh,+v -target-abi ilp32d -verify-machineinstrs < %s \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefixes=CHECK,OPTIMIZED
; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+experimental-zvfh,+v -target-abi lp64d -verify-machineinstrs < %s \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefixes=CHECK,OPTIMIZED
+; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+experimental-zvfh,+v,+no-optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+experimental-zvfh,+v,+no-optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED
define <vscale x 8 x half> @vsplat_nxv8f16(half %f) {
; CHECK-LABEL: vsplat_nxv8f16:
@@ -72,11 +76,18 @@
; Test that we fold this to a vlse with 0 stride.
define <vscale x 8 x float> @vsplat_load_nxv8f32(float* %ptr) {
-; CHECK-LABEL: vsplat_load_nxv8f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vlse32.v v8, (a0), zero
-; CHECK-NEXT: ret
+; OPTIMIZED-LABEL: vsplat_load_nxv8f32:
+; OPTIMIZED: # %bb.0:
+; OPTIMIZED-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; OPTIMIZED-NEXT: vlse32.v v8, (a0), zero
+; OPTIMIZED-NEXT: ret
+;
+; NOT-OPTIMIZED-LABEL: vsplat_load_nxv8f32:
+; NOT-OPTIMIZED: # %bb.0:
+; NOT-OPTIMIZED-NEXT: flw ft0, 0(a0)
+; NOT-OPTIMIZED-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; NOT-OPTIMIZED-NEXT: vfmv.v.f v8, ft0
+; NOT-OPTIMIZED-NEXT: ret
%f = load float, float* %ptr
%head = insertelement <vscale x 8 x float> poison, float %f, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
Index: llvm/lib/Target/RISCV/RISCVSubtarget.h
===================================================================
--- llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -101,6 +101,7 @@
bool HasShortForwardBranchOpt = false;
bool HasLUIADDIFusion = false;
bool HasForcedAtomics = false;
+ bool HasOptimizedZeroStrideLoad = true;
unsigned XLen = 32;
unsigned ZvlLen = 0;
MVT XLenVT = MVT::i32;
@@ -199,6 +200,7 @@
bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
bool hasForcedAtomics() const { return HasForcedAtomics; }
+ bool hasOptimizedZeroStrideLoad() const { return HasOptimizedZeroStrideLoad; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
unsigned getFLen() const {
Index: llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1790,6 +1790,10 @@
case RISCVISD::VFMV_S_F_VL:
case RISCVISD::VMV_V_X_VL:
case RISCVISD::VFMV_V_F_VL: {
+ // Only if we have optimized zero-stride vector load.
+ if (!Subtarget->hasOptimizedZeroStrideLoad())
+ break;
+
// Try to match splat of a scalar load to a strided load with stride of x0.
bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
Index: llvm/lib/Target/RISCV/RISCV.td
===================================================================
--- llvm/lib/Target/RISCV/RISCV.td
+++ llvm/lib/Target/RISCV/RISCV.td
@@ -452,6 +452,11 @@
"true", "Has reasonably performant unaligned scalar "
"loads and stores">;
+def TuneNoOptimizedZeroStrideLoad
+ : SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
+ "false", "Hasn't optimized (perform fewer memory operations)"
+ "zero-stride vector load">;
+
def TuneLUIADDIFusion
: SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
"true", "Enable LUI+ADDI macrofusion">;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D137699.474666.patch
Type: text/x-patch
Size: 4477 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221111/694f176c/attachment.bin>
More information about the llvm-commits
mailing list