[llvm] [RISCV] Pack build_vectors into largest available element type (PR #97351)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 18:39:13 PDT 2024
================
@@ -2171,3 +2209,240 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
+
+define <8 x i8> @buildvec_v8i8_pack(ptr %p, i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6, i8 %e7, i8 %e8) {
+; RV32-LABEL: buildvec_v8i8_pack:
+; RV32: # %bb.0:
+; RV32-NEXT: lbu a0, 0(sp)
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV32-NEXT: vmv.v.x v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v9, v8, a4
+; RV32-NEXT: vmv.v.x v8, a5
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vmv.v.i v0, 15
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v8i8_pack:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: lbu a0, 0(sp)
+; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV64V-ONLY-NEXT: vmv.v.x v8, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a4
+; RV64V-ONLY-NEXT: vmv.v.x v8, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vmv.v.i v0, 15
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v8i8_pack:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: andi a0, a5, 255
+; RVA22U64-NEXT: slli t0, a0, 32
+; RVA22U64-NEXT: lbu a5, 0(sp)
+; RVA22U64-NEXT: andi a0, a6, 255
+; RVA22U64-NEXT: slli a0, a0, 40
+; RVA22U64-NEXT: or a6, a0, t0
+; RVA22U64-NEXT: slli a5, a5, 56
+; RVA22U64-NEXT: andi a0, a7, 255
+; RVA22U64-NEXT: slli a0, a0, 48
+; RVA22U64-NEXT: or a0, a0, a6
+; RVA22U64-NEXT: or a0, a0, a5
+; RVA22U64-NEXT: andi a3, a3, 255
+; RVA22U64-NEXT: slli a3, a3, 16
+; RVA22U64-NEXT: andi a4, a4, 255
+; RVA22U64-NEXT: slli a4, a4, 24
+; RVA22U64-NEXT: or a3, a3, a4
+; RVA22U64-NEXT: andi a1, a1, 255
+; RVA22U64-NEXT: andi a2, a2, 255
+; RVA22U64-NEXT: slli a2, a2, 8
+; RVA22U64-NEXT: or a1, a1, a2
+; RVA22U64-NEXT: or a1, a1, a3
+; RVA22U64-NEXT: or a0, a0, a1
+; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RVA22U64-NEXT: vmv.s.x v8, a0
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v8i8_pack:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: lbu a0, 0(sp)
+; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV64ZVE32-NEXT: vmv.v.x v8, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a4
+; RV64ZVE32-NEXT: vmv.v.x v8, a5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vmv.v.i v0, 15
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4, v0.t
+; RV64ZVE32-NEXT: ret
+ %v1 = insertelement <8 x i8> poison, i8 %e1, i32 0
+ %v2 = insertelement <8 x i8> %v1, i8 %e2, i32 1
+ %v3 = insertelement <8 x i8> %v2, i8 %e3, i32 2
+ %v4 = insertelement <8 x i8> %v3, i8 %e4, i32 3
+ %v5 = insertelement <8 x i8> %v4, i8 %e5, i32 4
+ %v6 = insertelement <8 x i8> %v5, i8 %e6, i32 5
+ %v7 = insertelement <8 x i8> %v6, i8 %e7, i32 6
+ %v8 = insertelement <8 x i8> %v7, i8 %e8, i32 7
+ ret <8 x i8> %v8
+}
+
+define <6 x i8> @buildvec_v6i8_pack(ptr %p, i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6) {
+; RV32-LABEL: buildvec_v6i8_pack:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v6i8_pack:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-ONLY-NEXT: vmv.v.x v8, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v6i8_pack:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: andi a0, a3, 255
+; RVA22U64-NEXT: slli a0, a0, 16
+; RVA22U64-NEXT: andi a3, a4, 255
+; RVA22U64-NEXT: slli a3, a3, 24
+; RVA22U64-NEXT: or a0, a0, a3
+; RVA22U64-NEXT: andi a1, a1, 255
+; RVA22U64-NEXT: andi a2, a2, 255
+; RVA22U64-NEXT: slli a2, a2, 8
+; RVA22U64-NEXT: or a1, a1, a2
+; RVA22U64-NEXT: or a0, a0, a1
+; RVA22U64-NEXT: andi a1, a5, 255
+; RVA22U64-NEXT: slli a1, a1, 32
+; RVA22U64-NEXT: andi a2, a6, 255
+; RVA22U64-NEXT: slli a2, a2, 40
+; RVA22U64-NEXT: or a1, a1, a2
+; RVA22U64-NEXT: or a0, a0, a1
+; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RVA22U64-NEXT: vmv.s.x v8, a0
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v6i8_pack:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32-NEXT: vmv.v.x v8, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32-NEXT: ret
+ %v1 = insertelement <6 x i8> poison, i8 %e1, i32 0
+ %v2 = insertelement <6 x i8> %v1, i8 %e2, i32 1
+ %v3 = insertelement <6 x i8> %v2, i8 %e3, i32 2
+ %v4 = insertelement <6 x i8> %v3, i8 %e4, i32 3
+ %v5 = insertelement <6 x i8> %v4, i8 %e5, i32 4
+ %v6 = insertelement <6 x i8> %v5, i8 %e6, i32 5
+ ret <6 x i8> %v6
+}
+
+define <4 x i16> @buildvec_v4i16_pack(ptr %p, i16 %e1, i16 %e2, i16 %e3, i16 %e4) {
+; RV32-LABEL: buildvec_v4i16_pack:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v4i16_pack:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-ONLY-NEXT: vmv.v.x v8, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v4i16_pack:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: slli a4, a4, 48
+; RVA22U64-NEXT: zext.h a0, a3
+; RVA22U64-NEXT: slli a0, a0, 32
+; RVA22U64-NEXT: or a0, a0, a4
+; RVA22U64-NEXT: zext.h a1, a1
+; RVA22U64-NEXT: zext.h a2, a2
+; RVA22U64-NEXT: slli a2, a2, 16
+; RVA22U64-NEXT: or a1, a1, a2
+; RVA22U64-NEXT: or a0, a0, a1
+; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RVA22U64-NEXT: vmv.s.x v8, a0
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v4i16_pack:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64ZVE32-NEXT: vmv.v.x v8, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: ret
+ %v1 = insertelement <4 x i16> poison, i16 %e1, i32 0
+ %v2 = insertelement <4 x i16> %v1, i16 %e2, i32 1
+ %v3 = insertelement <4 x i16> %v2, i16 %e3, i32 2
+ %v4 = insertelement <4 x i16> %v3, i16 %e4, i32 3
+ ret <4 x i16> %v4
+}
+
+define <2 x i32> @buildvec_v2i32_pack(ptr %p, i32 %e1, i32 %e2) {
----------------
lukel97 wrote:
ptr %p looks to be unused in these tests
https://github.com/llvm/llvm-project/pull/97351
More information about the llvm-commits
mailing list