[llvm] 8680c28 - [RISCV] Remove mask from vrgatherei16 in lowerVECTOR_INTERLEAVE.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 20 09:36:48 PDT 2023
Author: Craig Topper
Date: 2023-06-20T09:36:38-07:00
New Revision: 8680c28add0b1725c34a52a2f8c509c892385c7d
URL: https://github.com/llvm/llvm-project/commit/8680c28add0b1725c34a52a2f8c509c892385c7d
DIFF: https://github.com/llvm/llvm-project/commit/8680c28add0b1725c34a52a2f8c509c892385c7d.diff
LOG: [RISCV] Remove mask from vrgatherei16 in lowerVECTOR_INTERLEAVE.
Unless I'm missing something we need to update the whole vector
not just where OddMask is true.
Reviewed By: luke
Differential Revision: https://reviews.llvm.org/D153087
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 46e94e2c34b27..8b5c8f4657dac 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7798,8 +7798,9 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
// Then perform the interleave
// v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
+ SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
- Concat, Idx, DAG.getUNDEF(ConcatVT), OddMask, VL);
+ Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
}
// Extract the two halves from the interleaved result
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 3c3dedfe827d9..e8d1f9c760936 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -80,7 +80,7 @@ define <vscale x 4 x i64> @vector_interleave_nxv4i64_nxv2i64(<vscale x 2 x i64>
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%res = call <vscale x 4 x i64> @llvm.experimental.vector.interleave2.nxv4i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -173,44 +173,32 @@ define <vscale x 16 x i64> @vector_interleave_nxv16i64_nxv8i64(<vscale x 8 x i64
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv8r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vsrl.vi v2, v24, 1
+; CHECK-NEXT: vsrl.vi v8, v24, 1
; CHECK-NEXT: vand.vi v24, v24, 1
-; CHECK-NEXT: vmsne.vi v0, v24, 0
+; CHECK-NEXT: vmsne.vi v10, v24, 0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vadd.vx v2, v2, a0, v0.t
-; CHECK-NEXT: vmv4r.v v12, v16
+; CHECK-NEXT: vmv8r.v v24, v0
+; CHECK-NEXT: vmv4r.v v12, v4
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vmv4r.v v28, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v24, v8, v2, v0.t
+; CHECK-NEXT: vrgatherei16.vv v0, v24, v8
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vrgatherei16.vv v24, v16, v2, v0.t
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vrgatherei16.vv v24, v16, v8
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
@@ -310,7 +298,7 @@ define <vscale x 4 x double> @vector_interleave_nxv4f64_nxv2f64(<vscale x 2 x do
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%res = call <vscale x 4 x double> @llvm.experimental.vector.interleave2.nxv4f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -363,44 +351,32 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv8r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vsrl.vi v2, v24, 1
+; CHECK-NEXT: vsrl.vi v8, v24, 1
; CHECK-NEXT: vand.vi v24, v24, 1
-; CHECK-NEXT: vmsne.vi v0, v24, 0
+; CHECK-NEXT: vmsne.vi v10, v24, 0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vadd.vx v2, v2, a0, v0.t
-; CHECK-NEXT: vmv4r.v v12, v16
+; CHECK-NEXT: vmv8r.v v24, v0
+; CHECK-NEXT: vmv4r.v v12, v4
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vmv4r.v v28, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v24, v8, v2, v0.t
+; CHECK-NEXT: vrgatherei16.vv v0, v24, v8
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vrgatherei16.vv v24, v16, v2, v0.t
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vrgatherei16.vv v24, v16, v8
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list