[llvm] [RISCV] Use a tail agnostic vslideup if possible for scalable insert_subvector (PR #83146)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 27 07:46:45 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
If we know that an insert_subvector inserting a fixed subvector will overwrite the entire tail of the vector, we use a tail agnostic vslideup. This was added in https://reviews.llvm.org/D147347, but we can do the same thing for scalable vectors too.
The `Policy` variable is defined in a slightly weird place but this is to mirror the fixed length subvector code path as closely as possible. I think we may be able to deduplicate them in future.
---
Full diff: https://github.com/llvm/llvm-project/pull/83146.diff
10 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+8-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll (+7-7)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll (+3-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0c98642748d4ec..6fdd4161383d92 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9729,8 +9729,15 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
+ ElementCount EndIndex =
+ ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
VL = computeVLMax(SubVecVT, DL, DAG);
+ // Use tail agnostic policy if we're inserting over Vec's tail.
+ unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+ if (EndIndex == VecVT.getVectorElementCount())
+ Policy = RISCVII::TAIL_AGNOSTIC;
+
// If we're inserting into the lowest elements, use a tail undisturbed
// vmv.v.v.
if (RemIdx == 0) {
@@ -9744,7 +9751,7 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
- SlideupAmt, Mask, VL);
+ SlideupAmt, Mask, VL, Policy);
}
// If required, insert this subvector back into the correct vector register.
diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index 0f3f57a0dec59a..d377082761736f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -76,7 +76,7 @@ define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale
; CHECK-NEXT: slli a1, a0, 1
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 243dc19a25588d..1ef63ffa9ee0c7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -2235,9 +2235,9 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFH-NEXT: vmfeq.vv v16, v8, v24, v0.t
; ZVFH-NEXT: add a0, a1, a1
-; ZVFH-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; ZVFH-NEXT: vslideup.vx v16, v1, a1
-; ZVFH-NEXT: vmv1r.v v0, v16
+; ZVFH-NEXT: vmv.v.v v0, v16
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 4
; ZVFH-NEXT: add sp, sp, a0
@@ -2337,7 +2337,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a2, a5
; ZVFHMIN-NEXT: .LBB85_4:
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v2, v26, a3
; ZVFHMIN-NEXT: sub a5, a2, a4
; ZVFHMIN-NEXT: sltu a6, a2, a5
@@ -2395,12 +2395,12 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v1
; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v8, v3, a3
; ZVFHMIN-NEXT: add a0, a1, a1
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v8, v2, a1
-; ZVFHMIN-NEXT: vmv1r.v v0, v8
+; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 34
; ZVFHMIN-NEXT: mul a0, a0, a1
@@ -3637,7 +3637,7 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc
; CHECK-NEXT: slli a0, a1, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v17, v16, a0
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: csrr a0, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
index e77966d8c43b37..aee255196ce2e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
@@ -3387,7 +3387,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: srli a0, a0, 3
; RV32-NEXT: add a1, a0, a0
-; RV32-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
+; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; RV32-NEXT: vslideup.vx v0, v24, a0
; RV32-NEXT: ret
;
@@ -3400,7 +3400,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: srli a0, a0, 3
; RV64-NEXT: add a1, a0, a0
-; RV64-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
+; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; RV64-NEXT: vslideup.vx v0, v24, a0
; RV64-NEXT: ret
;
@@ -3413,7 +3413,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; ZVFHMIN32-NEXT: csrr a0, vlenb
; ZVFHMIN32-NEXT: srli a0, a0, 3
; ZVFHMIN32-NEXT: add a1, a0, a0
-; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
+; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0
; ZVFHMIN32-NEXT: ret
;
@@ -3426,7 +3426,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; ZVFHMIN64-NEXT: csrr a0, vlenb
; ZVFHMIN64-NEXT: srli a0, a0, 3
; ZVFHMIN64-NEXT: add a1, a0, a0
-; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
+; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0
; ZVFHMIN64-NEXT: ret
%vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index 007afe12b8e438..a23b7c7b6ae9e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -2424,7 +2424,7 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t
; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v1, a1
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
@@ -2459,7 +2459,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b,
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-NEXT: add a0, a2, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v25, a2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
@@ -2492,7 +2492,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-NEXT: add a0, a2, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v25, a2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
index a2ac684604b944..5f35a4e50a9523 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
@@ -3235,7 +3235,7 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) {
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-NEXT: vmseq.vi v24, v16, 0
; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vx v0, v24, a0
; CHECK-NEXT: ret
%vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index c23c10205e6e36..4c64b1677b3626 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -22,7 +22,7 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vsm.v v9, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index e84fd1b1a7036a..1acc0fec8fe586 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -24,7 +24,7 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v0, v8, a0
; CHECK-NEXT: ret
;
@@ -44,7 +44,7 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: add a1, a0, a0
-; ZVBB-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; ZVBB-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v0, v8, a0
; ZVBB-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.experimental.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -376,9 +376,9 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a0
-; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv4f16_nxv2f16:
@@ -391,9 +391,9 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslidedown.vx v8, v10, a0
; ZVBB-NEXT: add a1, a0, a0
-; ZVBB-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a0
-; ZVBB-NEXT: vmv1r.v v8, v10
+; ZVBB-NEXT: vmv.v.v v8, v10
; ZVBB-NEXT: ret
%res = call <vscale x 4 x half> @llvm.experimental.vector.interleave2.nxv4f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
ret <vscale x 4 x half> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index 8e983f63428a6a..b888fde7d06836 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -937,7 +937,7 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v24
; ZVFHMIN-NEXT: vand.vi v8, v8, 1
; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
-; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0
; ZVFHMIN-NEXT: ret
%evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1>
@@ -967,7 +967,7 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v24
; ZVFHMIN-NEXT: vand.vi v8, v8, 1
; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
-; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0
; ZVFHMIN-NEXT: ret
%evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index 2546ec95a00793..515d77109af9f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -894,7 +894,7 @@ define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
; CHECK-NEXT: lui a2, 1048568
; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v9, a2
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
@@ -982,7 +982,7 @@ define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
; CHECK-NEXT: lui a2, 1048568
; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v9, a2
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
``````````
</details>
https://github.com/llvm/llvm-project/pull/83146
More information about the llvm-commits
mailing list