[llvm] [RISCV] Always expand zero strided vp.strided.load (PR #98901)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 06:43:15 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
This patch makes zero strided VP loads always be expanded to a scalar load and splat even if +optimized-zero-stride-load is present.
Expanding it allows more .vx splat patterns to be matched, which is needed to prevent regressions in #<!-- -->98111.
If the feature is present, RISCVISelDAGToDAG will combine it back to a zero strided load.
The RV32 test diff also shows how need to emit a zero strided load either way after expanding an SEW=64 strided load. We could maybe fix this in a later patch by not doing the expand if SEW>XLEN.
---
Full diff: https://github.com/llvm/llvm-project/pull/98901.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp (+3-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll (+29-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll (+28)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 35c46157c2eb9..b3f3dc6e2256c 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -163,10 +163,10 @@ bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
return true;
}
+// Always expand zero strided loads so we match more .vx splat patterns, even if
+// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
+// it back to a strided load if it's optimized.
bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
- if (ST->hasOptimizedZeroStrideLoad())
- return false;
-
Value *BasePtr, *VL;
using namespace PatternMatch;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index 41c7d1f5fd64c..95f853b77f18b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -638,7 +638,7 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64,
define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
; CHECK-OPT: # %bb.0:
-; CHECK-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
+; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero
; CHECK-OPT-NEXT: ret
;
@@ -657,7 +657,7 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
; CHECK-OPT: # %bb.0:
-; CHECK-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
+; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
; CHECK-OPT-NEXT: ret
;
@@ -670,3 +670,30 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
%load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3)
ret <4 x half> %load
}
+
+define <4 x i64> @zero_strided_vadd.vx(<4 x i64> %v, ptr %ptr) {
+; CHECK-RV32-LABEL: zero_strided_vadd.vx:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: addi sp, sp, -16
+; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32-NEXT: lw a1, 4(a0)
+; CHECK-RV32-NEXT: lw a0, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
+; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: addi a0, sp, 8
+; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero
+; CHECK-RV32-NEXT: vadd.vv v8, v8, v10
+; CHECK-RV32-NEXT: addi sp, sp, 16
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: zero_strided_vadd.vx:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: ld a0, 0(a0)
+; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-RV64-NEXT: vadd.vx v8, v8, a0
+; CHECK-RV64-NEXT: ret
+ %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4)
+ %w = add <4 x i64> %v, %load
+ ret <4 x i64> %w
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 6b8ded4914226..563da270272c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -822,3 +822,31 @@ define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
%load = call <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 4)
ret <vscale x 1 x half> %load
}
+
+define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr) {
+; CHECK-RV32-LABEL: zero_strided_vadd.vx:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: addi sp, sp, -16
+; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32-NEXT: lw a1, 4(a0)
+; CHECK-RV32-NEXT: lw a0, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
+; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: addi a0, sp, 8
+; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero
+; CHECK-RV32-NEXT: vadd.vv v8, v8, v9
+; CHECK-RV32-NEXT: addi sp, sp, 16
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: zero_strided_vadd.vx:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: ld a0, 0(a0)
+; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-RV64-NEXT: vadd.vx v8, v8, a0
+; CHECK-RV64-NEXT: ret
+ %vscale = call i32 @llvm.vscale()
+ %load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 %vscale)
+ %w = add <vscale x 1 x i64> %v, %load
+ ret <vscale x 1 x i64> %w
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/98901
More information about the llvm-commits
mailing list