[llvm] [IR] Add llvm.vector.[de]interleave{4,6,8} (PR #139893)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Wed May 14 13:56:13 PDT 2025
================
@@ -542,6 +620,300 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @v
ret {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} %res
}
+define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @vector_deinterleave8_v16i8_v2i8(<16 x i8> %v) {
+; RV32-LABEL: vector_deinterleave8_v16i8_v2i8:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: .cfi_def_cfa_offset 48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: .cfi_offset s1, -12
+; RV32-NEXT: .cfi_offset s2, -16
+; RV32-NEXT: .cfi_offset s3, -20
+; RV32-NEXT: .cfi_offset s4, -24
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a1, a0, 1
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT: csrr s1, vlenb
+; RV32-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 10
+; RV32-NEXT: vslidedown.vi v9, v8, 8
+; RV32-NEXT: srli s0, s1, 3
+; RV32-NEXT: srli s2, s1, 2
+; RV32-NEXT: add s3, s0, s0
+; RV32-NEXT: add s4, s2, s0
+; RV32-NEXT: vsetvli zero, s3, e8, mf2, tu, ma
+; RV32-NEXT: vslideup.vx v9, v10, s0
+; RV32-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 12
+; RV32-NEXT: vsetvli zero, s4, e8, mf2, tu, ma
+; RV32-NEXT: vslideup.vx v9, v10, s2
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT: li a1, 3
+; RV32-NEXT: mv a0, s0
+; RV32-NEXT: call __mulsi3
+; RV32-NEXT: add a1, a0, s0
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl1r.v v12, (a2) # vscale x 8-byte Folded Reload
+; RV32-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v12, 14
+; RV32-NEXT: vslidedown.vi v9, v12, 2
+; RV32-NEXT: vmv1r.v v10, v12
+; RV32-NEXT: vslidedown.vi v11, v12, 4
+; RV32-NEXT: vslidedown.vi v12, v12, 6
+; RV32-NEXT: srli s1, s1, 1
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl1r.v v13, (a2) # vscale x 8-byte Folded Reload
+; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; RV32-NEXT: vslideup.vx v13, v8, a0
+; RV32-NEXT: vsetvli zero, s3, e8, mf2, tu, ma
+; RV32-NEXT: vslideup.vx v10, v9, s0
+; RV32-NEXT: add a2, s1, s1
+; RV32-NEXT: vsetvli zero, s4, e8, mf2, tu, ma
+; RV32-NEXT: vslideup.vx v10, v11, s2
+; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; RV32-NEXT: vslideup.vx v10, v12, a0
+; RV32-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; RV32-NEXT: vslideup.vx v10, v13, s1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v10, (a0)
+; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; RV32-NEXT: vlseg8e8.v v8, (a0)
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a1, a0, 1
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa sp, 48
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: .cfi_restore s0
+; RV32-NEXT: .cfi_restore s1
+; RV32-NEXT: .cfi_restore s2
+; RV32-NEXT: .cfi_restore s3
+; RV32-NEXT: .cfi_restore s4
+; RV32-NEXT: addi sp, sp, 48
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_deinterleave8_v16i8_v2i8:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: .cfi_def_cfa_offset 64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: .cfi_offset s1, -24
+; RV64-NEXT: .cfi_offset s2, -32
+; RV64-NEXT: .cfi_offset s3, -40
+; RV64-NEXT: .cfi_offset s4, -48
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a1, a0, 1
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sub sp, sp, a0
+; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 3 * vlenb
+; RV64-NEXT: addi a0, sp, 16
+; RV64-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; RV64-NEXT: csrr s1, vlenb
+; RV64-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 10
+; RV64-NEXT: vslidedown.vi v9, v8, 8
+; RV64-NEXT: srli s0, s1, 3
+; RV64-NEXT: srli s2, s1, 2
+; RV64-NEXT: add s3, s0, s0
+; RV64-NEXT: add s4, s2, s0
+; RV64-NEXT: vsetvli zero, s3, e8, mf2, tu, ma
+; RV64-NEXT: vslideup.vx v9, v10, s0
+; RV64-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 12
+; RV64-NEXT: vsetvli zero, s4, e8, mf2, tu, ma
+; RV64-NEXT: vslideup.vx v9, v10, s2
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: add a0, sp, a0
+; RV64-NEXT: addi a0, a0, 16
+; RV64-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV64-NEXT: li a1, 3
+; RV64-NEXT: mv a0, s0
+; RV64-NEXT: call __muldi3
----------------
mshockwave wrote:
ditto M extension
https://github.com/llvm/llvm-project/pull/139893
More information about the llvm-commits
mailing list