[llvm] 58f525a - [RISCV] Add tests for deinterleave shuffles w/o vnsrl.vv

Fri Nov 1 08:02:15 PDT 2024

Author: Philip Reames
Date: 2024-11-01T08:02:04-07:00
New Revision: 58f525a23ca44f1acdb80ff803fb7b95acf8c7d3

URL: https://github.com/llvm/llvm-project/commit/58f525a23ca44f1acdb80ff803fb7b95acf8c7d3
DIFF: https://github.com/llvm/llvm-project/commit/58f525a23ca44f1acdb80ff803fb7b95acf8c7d3.diff

LOG: [RISCV] Add tests for deinterleave shuffles w/o vnsrl.vv

With SEW=64, the vnsrl trick we primary rely on does not work.  This
is handled correctly today, but we have fairly minimal testing of the
resulting shuffles which makes it hard to demonstrate value of an
upcoming change.

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
    llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index 4fed94401f0acd..bc32518b671953 100644

--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -90,6 +90,64 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
 ret {<2 x i64>, <2 x i64>} %retval
 }
 
+define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) {
+; CHECK-LABEL: vector_deinterleave_v4i64_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vid.v v12
+; CHECK-NEXT:    vadd.vv v14, v12, v12
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v14
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vadd.vi v15, v14, -4
+; CHECK-NEXT:    vmv.v.i v0, 12
+; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v16, v8, 4
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; CHECK-NEXT:    vrgatherei16.vv v12, v16, v15, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vadd.vi v15, v14, 1
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v15
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vadd.vi v8, v14, -3
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vrgatherei16.vv v10, v16, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
+  %retval = call {<4 x i64>, <4 x i64>} @llvm.vector.deinterleave2.v8i64(<8 x i64> %vec)
+  ret {<4 x i64>, <4 x i64>} %retval
+}
+
+define {<8 x i64>, <8 x i64>} @vector_deinterleave_v8i64_v16i64(<16 x i64> %vec) {
+; CHECK-LABEL: vector_deinterleave_v8i64_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv8r.v v16, v8
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vadd.vv v7, v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v8, v16, v7
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, -16
+; CHECK-NEXT:    vadd.vi v12, v7, -8
+; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
+; CHECK-NEXT:    vslidedown.vi v24, v16, 8
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
+; CHECK-NEXT:    vrgatherei16.vv v8, v24, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vadd.vi v20, v7, 1
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v12, v16, v20
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vadd.vi v16, v7, -7
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vrgatherei16.vv v12, v24, v16, v0.t
+; CHECK-NEXT:    ret
+  %retval = call {<8 x i64>, <8 x i64>} @llvm.vector.deinterleave2.v16i64(<16 x i64> %vec)
+  ret {<8 x i64>, <8 x i64>} %retval
+}
+
 declare {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1>)
 declare {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8>)
 declare {<8 x i16>, <8 x i16>} @llvm.vector.deinterleave2.v16i16(<16 x i16>)
@@ -176,9 +234,41 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double
 ret {<2 x double>, <2 x double>} %retval
 }
 
+define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double> %vec) {
+; CHECK-LABEL: vector_deinterleave_v4f64_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vid.v v12
+; CHECK-NEXT:    vadd.vv v14, v12, v12
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v14
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vadd.vi v15, v14, -4
+; CHECK-NEXT:    vmv.v.i v0, 12
+; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v16, v8, 4
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; CHECK-NEXT:    vrgatherei16.vv v12, v16, v15, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vadd.vi v15, v14, 1
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v15
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vadd.vi v8, v14, -3
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vrgatherei16.vv v10, v16, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
+%retval = call {<4 x double>, <4 x double>} @llvm.vector.deinterleave2.v8f64(<8 x double> %vec)
+ret {<4 x double>, <4 x double>} %retval
+}
+
 declare {<2 x half>,<2 x half>} @llvm.vector.deinterleave2.v4f16(<4 x half>)
 declare {<4 x half>, <4 x half>} @llvm.vector.deinterleave2.v8f16(<8 x half>)
 declare {<2 x float>, <2 x float>} @llvm.vector.deinterleave2.v4f32(<4 x float>)
 declare {<8 x half>, <8 x half>} @llvm.vector.deinterleave2.v16f16(<16 x half>)
 declare {<4 x float>, <4 x float>} @llvm.vector.deinterleave2.v8f32(<8 x float>)
 declare {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 14fe477f537cf6..499eee1819a4ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -84,6 +84,22 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
 ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
 }
 
+define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv8i64(<vscale x 8 x i64> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vid.v v16
+; CHECK-NEXT:    vadd.vv v24, v16, v16
+; CHECK-NEXT:    vrgather.vv v16, v8, v24
+; CHECK-NEXT:    vadd.vi v24, v24, 1
+; CHECK-NEXT:    vrgather.vv v0, v8, v24
+; CHECK-NEXT:    vmv4r.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v0
+; CHECK-NEXT:    ret
+%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
+ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
+}
+
 declare {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1>)
 declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
 declare {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)