[llvm] c55a080 - [RISCV] Add shuffle coverage for compress, decompress, and repeat idioms
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 26 09:28:15 PST 2024
Author: Philip Reames
Date: 2024-11-26T09:27:56-08:00
New Revision: c55a080c080ed76a9aabe6dcd1966fedc0ecda5a
URL: https://github.com/llvm/llvm-project/commit/c55a080c080ed76a9aabe6dcd1966fedc0ecda5a
DIFF: https://github.com/llvm/llvm-project/commit/c55a080c080ed76a9aabe6dcd1966fedc0ecda5a.diff
LOG: [RISCV] Add shuffle coverage for compress, decompress, and repeat idioms
compress is intented to match vcompress from the ISA manual. Note that
deinterleave is a subset of this, and is already tested elsewhere.
decompress is the synthetic pattern defined in same - though we can often
do better than the mentioned iota/vrgather. Note that some of these
can also be expressed as interleave with at least one undef source,
and is already tested elsewhere.
repeat repeats each input element N times in the output. It can be
described as as a interleave operations, but we can sometimes do
better lowering wise.
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index e46587f58b4eb6..dbfe7bb51dbffa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -744,3 +744,200 @@ define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) {
%s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56>
ret <8 x i8> %s
}
+
+define <8 x i8> @shuffle_compress_singlesrc_e8(<8 x i8> %v) {
+; CHECK-LABEL: shuffle_compress_singlesrc_e8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI49_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0)
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
+ ret <8 x i8> %out
+}
+
+define <8 x i16> @shuffle_compress_singlesrc_e16(<8 x i16> %v) {
+; CHECK-LABEL: shuffle_compress_singlesrc_e16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI50_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
+ ret <8 x i16> %out
+}
+
+define <8 x i32> @shuffle_compress_singlesrc_e32(<8 x i32> %v) {
+; CHECK-LABEL: shuffle_compress_singlesrc_e32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI51_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0)
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef>
+ ret <8 x i32> %out
+}
+
+define <8 x i64> @shuffle_compress_singlesrc_e64(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_compress_singlesrc_e64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI52_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0)
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
+ ret <8 x i64> %out
+}
+
+define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) {
+; CHECK-LABEL: shuffle_compress_singlesrc_gaps_e32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI53_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0)
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
+ ret <8 x i32> %out
+}
+
+define <8 x i32> @shuffle_decompress2_singlesrc_e32(<8 x i32> %v) {
+; CHECK-LABEL: shuffle_decompress2_singlesrc_e32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v8
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vwmaccu.vx v10, a0, v8
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
+ ret <8 x i32> %out
+}
+
+define <8 x i32> @shuffle_decompress3_singlesrc_e32(<8 x i32> %v) {
+; RV32-LABEL: shuffle_decompress3_singlesrc_e32:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a0, %hi(.LCPI55_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0)
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vle16.v v12, (a0)
+; RV32-NEXT: vrgatherei16.vv v10, v8, v12
+; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: shuffle_decompress3_singlesrc_e32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a0, 32769
+; RV64-NEXT: slli a0, a0, 21
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vrgatherei16.vv v10, v8, v12
+; RV64-NEXT: vmv.v.v v8, v10
+; RV64-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 2, i32 undef>
+ ret <8 x i32> %out
+}
+
+; TODO: This should be a single vslideup.vi
+define <8 x i32> @shuffle_decompress4_singlesrc_e32(<8 x i32> %v) {
+; CHECK-LABEL: shuffle_decompress4_singlesrc_e32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vsrl.vi v12, v10, 2
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
+ ret <8 x i32> %out
+}
+
+; TODO: This should be either a single vslideup.vi or two widening interleaves.
+define <8 x i8> @shuffle_decompress4_singlesrc_e8(<8 x i8> %v) {
+; CHECK-LABEL: shuffle_decompress4_singlesrc_e8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vsrl.vi v10, v9, 2
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
+ ret <8 x i8> %out
+}
+
+define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) {
+; CHECK-LABEL: shuffle_decompress_singlesrc_e32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI58_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0)
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 undef, i32 4>
+ ret <8 x i32> %out
+}
+
+define <8 x i32> @shuffle_repeat2_singlesrc_e32(<8 x i32> %v) {
+; CHECK-LABEL: shuffle_repeat2_singlesrc_e32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v8
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vwmaccu.vx v10, a0, v8
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+ ret <8 x i32> %out
+}
+
+define <8 x i32> @shuffle_repeat3_singlesrc_e32(<8 x i32> %v) {
+; CHECK-LABEL: shuffle_repeat3_singlesrc_e32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v0, 7
+; CHECK-NEXT: vmv.v.i v11, 1
+; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vmerge.vim v11, v11, 0, v0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vim v12, v11, 2, v0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2>
+ ret <8 x i32> %out
+}
+
+define <8 x i32> @shuffle_repeat4_singlesrc_e32(<8 x i32> %v) {
+; CHECK-LABEL: shuffle_repeat4_singlesrc_e32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vsrl.vi v12, v10, 2
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %out
+}
More information about the llvm-commits
mailing list