[llvm] a144f58 - [RISCV] Expand test coverage for ri.vunzip2{a,b}
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 12:49:55 PDT 2025
Author: Philip Reames
Date: 2025-06-03T12:49:49-07:00
New Revision: a144f58a7932a66139f6c570d353c0248d9073d4
URL: https://github.com/llvm/llvm-project/commit/a144f58a7932a66139f6c570d353c0248d9073d4
DIFF: https://github.com/llvm/llvm-project/commit/a144f58a7932a66139f6c570d353c0248d9073d4.diff
LOG: [RISCV] Expand test coverage for ri.vunzip2{a,b}
Cover cases that upcoming optimization changes will improve.
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
index b692a80159288..9c884454aa025 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
@@ -1302,3 +1302,477 @@ entry:
store <2 x double> %shuffle.i5, ptr %out, align 8
ret void
}
+
+define <2 x i64> @unzip2a_dual_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; V-LABEL: unzip2a_dual_v2i64:
+; V: # %bb.0: # %entry
+; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; V-NEXT: vslideup.vi v8, v9, 1
+; V-NEXT: ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v2i64:
+; ZVE32F: # %bb.0: # %entry
+; ZVE32F-NEXT: mv a1, a2
+; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: unzip2a_dual_v2i64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v9, 1
+; ZIP-NEXT: ret
+entry:
+ %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %c
+}
+
+define <4 x i64> @unzip2a_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; V-LABEL: unzip2a_dual_v4i64:
+; V: # %bb.0: # %entry
+; V-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; V-NEXT: vmv.v.i v0, 8
+; V-NEXT: vslideup.vi v10, v9, 2
+; V-NEXT: vslideup.vi v10, v9, 1, v0.t
+; V-NEXT: vmv.v.i v0, 2
+; V-NEXT: vslidedown.vi v8, v8, 1, v0.t
+; V-NEXT: vmv.v.i v0, 12
+; V-NEXT: vmerge.vvm v8, v8, v10, v0
+; V-NEXT: ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v4i64:
+; ZVE32F: # %bb.0: # %entry
+; ZVE32F-NEXT: ld a3, 0(a2)
+; ZVE32F-NEXT: ld a2, 16(a2)
+; ZVE32F-NEXT: ld a4, 0(a1)
+; ZVE32F-NEXT: ld a1, 16(a1)
+; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
+; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: srli a5, a2, 32
+; ZVE32F-NEXT: srli a6, a3, 32
+; ZVE32F-NEXT: srli a7, a1, 32
+; ZVE32F-NEXT: srli t0, a4, 32
+; ZVE32F-NEXT: vmv.v.x v8, a4
+; ZVE32F-NEXT: vmv.v.x v9, a3
+; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT: vse32.v v9, (a0)
+; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: unzip2a_dual_v4i64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; ZIP-NEXT: vmv.v.i v0, 8
+; ZIP-NEXT: vslideup.vi v10, v9, 2
+; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
+; ZIP-NEXT: vmv.v.i v0, 12
+; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
+; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
+; ZIP-NEXT: ret
+entry:
+ %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i64> %c
+}
+
+define <16 x i64> @unzip2a_dual_v16i64(<16 x i64> %a, <16 x i64> %b) {
+; V-LABEL: unzip2a_dual_v16i64:
+; V: # %bb.0: # %entry
+; V-NEXT: lui a0, 5
+; V-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; V-NEXT: vid.v v16
+; V-NEXT: addi a0, a0, 1365
+; V-NEXT: vmv.s.x v20, a0
+; V-NEXT: li a0, -256
+; V-NEXT: vadd.vv v21, v16, v16
+; V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; V-NEXT: vcompress.vm v16, v8, v20
+; V-NEXT: vmv.s.x v0, a0
+; V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; V-NEXT: vadd.vi v8, v21, -16
+; V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
+; V-NEXT: vrgatherei16.vv v16, v12, v8, v0.t
+; V-NEXT: vmv.v.v v8, v16
+; V-NEXT: ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v16i64:
+; ZVE32F: # %bb.0: # %entry
+; ZVE32F-NEXT: addi sp, sp, -256
+; ZVE32F-NEXT: .cfi_def_cfa_offset 256
+; ZVE32F-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT: sd s2, 232(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT: sd s3, 224(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT: sd s4, 216(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT: .cfi_offset ra, -8
+; ZVE32F-NEXT: .cfi_offset s0, -16
+; ZVE32F-NEXT: .cfi_offset s2, -24
+; ZVE32F-NEXT: .cfi_offset s3, -32
+; ZVE32F-NEXT: .cfi_offset s4, -40
+; ZVE32F-NEXT: addi s0, sp, 256
+; ZVE32F-NEXT: .cfi_def_cfa s0, 0
+; ZVE32F-NEXT: andi sp, sp, -128
+; ZVE32F-NEXT: ld t5, 0(a1)
+; ZVE32F-NEXT: ld t2, 16(a1)
+; ZVE32F-NEXT: ld a4, 32(a1)
+; ZVE32F-NEXT: ld a3, 48(a1)
+; ZVE32F-NEXT: ld a6, 64(a1)
+; ZVE32F-NEXT: ld a5, 80(a1)
+; ZVE32F-NEXT: ld a7, 96(a1)
+; ZVE32F-NEXT: ld a1, 112(a1)
+; ZVE32F-NEXT: ld t1, 0(a2)
+; ZVE32F-NEXT: ld t0, 16(a2)
+; ZVE32F-NEXT: ld t4, 32(a2)
+; ZVE32F-NEXT: ld t3, 48(a2)
+; ZVE32F-NEXT: ld t6, 64(a2)
+; ZVE32F-NEXT: ld s2, 80(a2)
+; ZVE32F-NEXT: ld s3, 96(a2)
+; ZVE32F-NEXT: ld a2, 112(a2)
+; ZVE32F-NEXT: srli s4, t5, 32
+; ZVE32F-NEXT: sw t5, 0(sp)
+; ZVE32F-NEXT: sw s4, 4(sp)
+; ZVE32F-NEXT: srli t5, t2, 32
+; ZVE32F-NEXT: sw t2, 8(sp)
+; ZVE32F-NEXT: srli t2, s3, 32
+; ZVE32F-NEXT: sw s3, 112(sp)
+; ZVE32F-NEXT: sw t2, 116(sp)
+; ZVE32F-NEXT: srli t2, a2, 32
+; ZVE32F-NEXT: sw a2, 120(sp)
+; ZVE32F-NEXT: sw t2, 124(sp)
+; ZVE32F-NEXT: srli a2, t6, 32
+; ZVE32F-NEXT: sw t6, 96(sp)
+; ZVE32F-NEXT: sw a2, 100(sp)
+; ZVE32F-NEXT: srli a2, s2, 32
+; ZVE32F-NEXT: sw s2, 104(sp)
+; ZVE32F-NEXT: sw a2, 108(sp)
+; ZVE32F-NEXT: srli a2, t4, 32
+; ZVE32F-NEXT: sw t4, 80(sp)
+; ZVE32F-NEXT: sw a2, 84(sp)
+; ZVE32F-NEXT: srli a2, t3, 32
+; ZVE32F-NEXT: sw t3, 88(sp)
+; ZVE32F-NEXT: sw a2, 92(sp)
+; ZVE32F-NEXT: srli a2, t1, 32
+; ZVE32F-NEXT: sw t1, 64(sp)
+; ZVE32F-NEXT: sw a2, 68(sp)
+; ZVE32F-NEXT: srli a2, t0, 32
+; ZVE32F-NEXT: sw t0, 72(sp)
+; ZVE32F-NEXT: sw a2, 76(sp)
+; ZVE32F-NEXT: srli a2, a7, 32
+; ZVE32F-NEXT: sw a7, 48(sp)
+; ZVE32F-NEXT: sw a2, 52(sp)
+; ZVE32F-NEXT: srli a2, a1, 32
+; ZVE32F-NEXT: sw a1, 56(sp)
+; ZVE32F-NEXT: sw a2, 60(sp)
+; ZVE32F-NEXT: srli a1, a6, 32
+; ZVE32F-NEXT: sw a6, 32(sp)
+; ZVE32F-NEXT: sw a1, 36(sp)
+; ZVE32F-NEXT: srli a1, a5, 32
+; ZVE32F-NEXT: sw a5, 40(sp)
+; ZVE32F-NEXT: sw a1, 44(sp)
+; ZVE32F-NEXT: srli a1, a4, 32
+; ZVE32F-NEXT: sw a4, 16(sp)
+; ZVE32F-NEXT: sw a1, 20(sp)
+; ZVE32F-NEXT: srli a1, a3, 32
+; ZVE32F-NEXT: sw a3, 24(sp)
+; ZVE32F-NEXT: sw a1, 28(sp)
+; ZVE32F-NEXT: li a1, 32
+; ZVE32F-NEXT: sw t5, 12(sp)
+; ZVE32F-NEXT: mv a2, sp
+; ZVE32F-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; ZVE32F-NEXT: vle32.v v8, (a2)
+; ZVE32F-NEXT: vse32.v v8, (a0)
+; ZVE32F-NEXT: addi sp, s0, -256
+; ZVE32F-NEXT: .cfi_def_cfa sp, 256
+; ZVE32F-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT: ld s2, 232(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT: ld s3, 224(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT: ld s4, 216(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT: .cfi_restore ra
+; ZVE32F-NEXT: .cfi_restore s0
+; ZVE32F-NEXT: .cfi_restore s2
+; ZVE32F-NEXT: .cfi_restore s3
+; ZVE32F-NEXT: .cfi_restore s4
+; ZVE32F-NEXT: addi sp, sp, 256
+; ZVE32F-NEXT: .cfi_def_cfa_offset 0
+; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: unzip2a_dual_v16i64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e64, m2, ta, ma
+; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v10
+; ZIP-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; ZIP-NEXT: vid.v v8
+; ZIP-NEXT: li a0, -256
+; ZIP-NEXT: vadd.vv v8, v8, v8
+; ZIP-NEXT: vmv.s.x v0, a0
+; ZIP-NEXT: vadd.vi v8, v8, -16
+; ZIP-NEXT: vsetvli zero, zero, e64, m4, ta, mu
+; ZIP-NEXT: vrgatherei16.vv v16, v12, v8, v0.t
+; ZIP-NEXT: vmv.v.v v8, v16
+; ZIP-NEXT: ret
+entry:
+ %c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ ret <16 x i64> %c
+}
+
+define <4 x i64> @unzip2a_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_range(4,4) {
+; V-LABEL: unzip2a_dual_v4i64_exact:
+; V: # %bb.0: # %entry
+; V-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; V-NEXT: vmv.v.i v0, 8
+; V-NEXT: vslideup.vi v10, v9, 2
+; V-NEXT: vslideup.vi v10, v9, 1, v0.t
+; V-NEXT: vmv.v.i v0, 2
+; V-NEXT: vslidedown.vi v8, v8, 1, v0.t
+; V-NEXT: vmv.v.i v0, 12
+; V-NEXT: vmerge.vvm v8, v8, v10, v0
+; V-NEXT: ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v4i64_exact:
+; ZVE32F: # %bb.0: # %entry
+; ZVE32F-NEXT: ld a3, 0(a2)
+; ZVE32F-NEXT: ld a2, 16(a2)
+; ZVE32F-NEXT: ld a4, 0(a1)
+; ZVE32F-NEXT: ld a1, 16(a1)
+; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
+; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: srli a5, a2, 32
+; ZVE32F-NEXT: srli a6, a3, 32
+; ZVE32F-NEXT: srli a7, a1, 32
+; ZVE32F-NEXT: srli t0, a4, 32
+; ZVE32F-NEXT: vmv.v.x v8, a4
+; ZVE32F-NEXT: vmv.v.x v9, a3
+; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT: vs1r.v v9, (a0)
+; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: unzip2a_dual_v4i64_exact:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; ZIP-NEXT: vmv.v.i v0, 8
+; ZIP-NEXT: vslideup.vi v10, v9, 2
+; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
+; ZIP-NEXT: vmv.v.i v0, 12
+; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
+; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
+; ZIP-NEXT: ret
+entry:
+ %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i64> %c
+}
+
+define <4 x i64> @unzip2a_dual_v4i64_exact_nf2(<4 x i64> %a, <4 x i64> %b) vscale_range(8,8) {
+; V-LABEL: unzip2a_dual_v4i64_exact_nf2:
+; V: # %bb.0: # %entry
+; V-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; V-NEXT: vmv.v.i v0, 8
+; V-NEXT: vslideup.vi v10, v9, 2
+; V-NEXT: vslideup.vi v10, v9, 1, v0.t
+; V-NEXT: vmv.v.i v0, 2
+; V-NEXT: vslidedown.vi v8, v8, 1, v0.t
+; V-NEXT: vmv.v.i v0, 12
+; V-NEXT: vmerge.vvm v8, v8, v10, v0
+; V-NEXT: ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v4i64_exact_nf2:
+; ZVE32F: # %bb.0: # %entry
+; ZVE32F-NEXT: ld a3, 0(a2)
+; ZVE32F-NEXT: ld a2, 16(a2)
+; ZVE32F-NEXT: ld a4, 0(a1)
+; ZVE32F-NEXT: ld a1, 16(a1)
+; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
+; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: srli a5, a2, 32
+; ZVE32F-NEXT: srli a6, a3, 32
+; ZVE32F-NEXT: srli a7, a1, 32
+; ZVE32F-NEXT: srli t0, a4, 32
+; ZVE32F-NEXT: vmv.v.x v8, a4
+; ZVE32F-NEXT: vmv.v.x v9, a3
+; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT: vse32.v v9, (a0)
+; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: unzip2a_dual_v4i64_exact_nf2:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; ZIP-NEXT: vmv.v.i v0, 8
+; ZIP-NEXT: vslideup.vi v10, v9, 2
+; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
+; ZIP-NEXT: vmv.v.i v0, 12
+; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
+; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
+; ZIP-NEXT: ret
+entry:
+ %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i64> %c
+}
+
+define <16 x i64> @unzip2a_dual_v16i64_exact(<16 x i64> %a, <16 x i64> %b) vscale_range(4,4) {
+; V-LABEL: unzip2a_dual_v16i64_exact:
+; V: # %bb.0: # %entry
+; V-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; V-NEXT: vslideup.vi v19, v15, 2
+; V-NEXT: vmv.v.i v16, 8
+; V-NEXT: vmv.v.i v17, 2
+; V-NEXT: vmv.v.i v18, 12
+; V-NEXT: vmv.v.v v0, v16
+; V-NEXT: vslideup.vi v19, v15, 1, v0.t
+; V-NEXT: vmv.v.v v0, v17
+; V-NEXT: vslidedown.vi v14, v14, 1, v0.t
+; V-NEXT: vmv.v.v v0, v18
+; V-NEXT: vmerge.vvm v15, v14, v19, v0
+; V-NEXT: vslideup.vi v14, v13, 2
+; V-NEXT: vmv.v.v v0, v16
+; V-NEXT: vslideup.vi v14, v13, 1, v0.t
+; V-NEXT: vmv.v.v v0, v17
+; V-NEXT: vslidedown.vi v12, v12, 1, v0.t
+; V-NEXT: vmv.v.v v0, v18
+; V-NEXT: vmerge.vvm v14, v12, v14, v0
+; V-NEXT: vslideup.vi v12, v11, 2
+; V-NEXT: li a0, -256
+; V-NEXT: vmv.v.v v0, v16
+; V-NEXT: vslideup.vi v12, v11, 1, v0.t
+; V-NEXT: vmv.v.v v0, v17
+; V-NEXT: vslidedown.vi v10, v10, 1, v0.t
+; V-NEXT: vmv.v.v v0, v18
+; V-NEXT: vmerge.vvm v13, v10, v12, v0
+; V-NEXT: vslideup.vi v10, v9, 2
+; V-NEXT: vmv.v.v v0, v16
+; V-NEXT: vslideup.vi v10, v9, 1, v0.t
+; V-NEXT: vmv.v.v v0, v17
+; V-NEXT: vslidedown.vi v8, v8, 1, v0.t
+; V-NEXT: vmv.v.v v0, v18
+; V-NEXT: vmerge.vvm v12, v8, v10, v0
+; V-NEXT: vmv.s.x v0, a0
+; V-NEXT: vsetivli zero, 16, e64, m4, ta, ma
+; V-NEXT: vmerge.vvm v8, v12, v12, v0
+; V-NEXT: ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v16i64_exact:
+; ZVE32F: # %bb.0: # %entry
+; ZVE32F-NEXT: ld a6, 64(a1)
+; ZVE32F-NEXT: ld a4, 80(a1)
+; ZVE32F-NEXT: ld a7, 96(a1)
+; ZVE32F-NEXT: ld t0, 0(a2)
+; ZVE32F-NEXT: ld a3, 16(a2)
+; ZVE32F-NEXT: ld t1, 32(a2)
+; ZVE32F-NEXT: ld a5, 112(a1)
+; ZVE32F-NEXT: srli t2, a7, 32
+; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
+; ZVE32F-NEXT: vmv.v.x v8, a6
+; ZVE32F-NEXT: srli a6, a6, 32
+; ZVE32F-NEXT: vmv.v.x v9, a7
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; ZVE32F-NEXT: vslide1down.vx v9, v9, t2
+; ZVE32F-NEXT: ld a6, 0(a1)
+; ZVE32F-NEXT: ld a7, 16(a1)
+; ZVE32F-NEXT: ld t2, 32(a1)
+; ZVE32F-NEXT: ld a1, 48(a1)
+; ZVE32F-NEXT: vmv.v.x v10, a6
+; ZVE32F-NEXT: srli a6, a6, 32
+; ZVE32F-NEXT: vslide1down.vx v10, v10, a6
+; ZVE32F-NEXT: ld a6, 48(a2)
+; ZVE32F-NEXT: vmv.v.x v11, t1
+; ZVE32F-NEXT: srli t1, t1, 32
+; ZVE32F-NEXT: vmv.v.x v12, t0
+; ZVE32F-NEXT: srli t0, t0, 32
+; ZVE32F-NEXT: vmv.v.x v13, t2
+; ZVE32F-NEXT: srli t2, t2, 32
+; ZVE32F-NEXT: vslide1down.vx v13, v13, t2
+; ZVE32F-NEXT: vslide1down.vx v12, v12, t0
+; ZVE32F-NEXT: vslide1down.vx v11, v11, t1
+; ZVE32F-NEXT: ld t0, 64(a2)
+; ZVE32F-NEXT: ld t1, 80(a2)
+; ZVE32F-NEXT: ld t2, 96(a2)
+; ZVE32F-NEXT: ld a2, 112(a2)
+; ZVE32F-NEXT: vmv.v.x v14, t0
+; ZVE32F-NEXT: srli t0, t0, 32
+; ZVE32F-NEXT: vslide1down.vx v14, v14, t0
+; ZVE32F-NEXT: vmv.v.x v15, t2
+; ZVE32F-NEXT: srli t0, t2, 32
+; ZVE32F-NEXT: vslide1down.vx v15, v15, t0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; ZVE32F-NEXT: srli a4, a4, 32
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT: srli a5, a5, 32
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT: vslide1down.vx v10, v10, a7
+; ZVE32F-NEXT: srli a4, a7, 32
+; ZVE32F-NEXT: vslide1down.vx v10, v10, a4
+; ZVE32F-NEXT: vslide1down.vx v12, v12, a3
+; ZVE32F-NEXT: srli a3, a3, 32
+; ZVE32F-NEXT: vslide1down.vx v12, v12, a3
+; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: vslide1down.vx v14, v14, t1
+; ZVE32F-NEXT: srli a3, t1, 32
+; ZVE32F-NEXT: vslide1down.vx v14, v14, a3
+; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT: vslide1down.vx v8, v13, a1
+; ZVE32F-NEXT: srli a1, a1, 32
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; ZVE32F-NEXT: vslide1down.vx v10, v11, a6
+; ZVE32F-NEXT: srli a1, a6, 32
+; ZVE32F-NEXT: vslide1down.vx v10, v10, a1
+; ZVE32F-NEXT: vslidedown.vi v10, v12, 4, v0.t
+; ZVE32F-NEXT: vslide1down.vx v11, v15, a2
+; ZVE32F-NEXT: srli a2, a2, 32
+; ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; ZVE32F-NEXT: vslidedown.vi v11, v14, 4, v0.t
+; ZVE32F-NEXT: vs4r.v v8, (a0)
+; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: unzip2a_dual_v16i64_exact:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; ZIP-NEXT: vslideup.vi v18, v15, 2
+; ZIP-NEXT: vmv.v.i v16, 8
+; ZIP-NEXT: vmv.v.i v17, 12
+; ZIP-NEXT: vslideup.vi v20, v13, 2
+; ZIP-NEXT: vmv.v.v v0, v16
+; ZIP-NEXT: vslideup.vi v18, v15, 1, v0.t
+; ZIP-NEXT: ri.vunzip2a.vv v15, v14, v19
+; ZIP-NEXT: vmv.v.v v0, v17
+; ZIP-NEXT: vmerge.vvm v15, v15, v18, v0
+; ZIP-NEXT: vmv.v.v v0, v16
+; ZIP-NEXT: vslideup.vi v20, v13, 1, v0.t
+; ZIP-NEXT: ri.vunzip2a.vv v14, v12, v13
+; ZIP-NEXT: vslideup.vi v12, v11, 2
+; ZIP-NEXT: vslideup.vi v18, v9, 2
+; ZIP-NEXT: vmv.v.v v0, v17
+; ZIP-NEXT: vmerge.vvm v14, v14, v20, v0
+; ZIP-NEXT: li a0, -256
+; ZIP-NEXT: ri.vunzip2a.vv v20, v10, v13
+; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v19
+; ZIP-NEXT: vmv.v.v v0, v16
+; ZIP-NEXT: vslideup.vi v12, v11, 1, v0.t
+; ZIP-NEXT: vmv.v.v v0, v17
+; ZIP-NEXT: vmerge.vvm v13, v20, v12, v0
+; ZIP-NEXT: vmv.v.v v0, v16
+; ZIP-NEXT: vslideup.vi v18, v9, 1, v0.t
+; ZIP-NEXT: vmv.v.v v0, v17
+; ZIP-NEXT: vmerge.vvm v12, v10, v18, v0
+; ZIP-NEXT: vmv.s.x v0, a0
+; ZIP-NEXT: vsetivli zero, 16, e64, m4, ta, ma
+; ZIP-NEXT: vmerge.vvm v8, v12, v12, v0
+; ZIP-NEXT: ret
+entry:
+ %c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ ret <16 x i64> %c
+}
More information about the llvm-commits
mailing list