[llvm] [IR] Add llvm.vector.[de]interleave{4,6,8} (PR #139893)

Wed May 14 13:56:12 PDT 2025

================
@@ -542,6 +620,300 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @v
 	   ret {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} %res
 }
 
+define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @vector_deinterleave8_v16i8_v2i8(<16 x i8> %v) {
+; RV32-LABEL: vector_deinterleave8_v16i8_v2i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -48
+; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    .cfi_offset s1, -12
+; RV32-NEXT:    .cfi_offset s2, -16
+; RV32-NEXT:    .cfi_offset s3, -20
+; RV32-NEXT:    .cfi_offset s4, -24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a1, a0, 1
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    csrr s1, vlenb
+; RV32-NEXT:    vsetivli zero, 2, e8, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 10
+; RV32-NEXT:    vslidedown.vi v9, v8, 8
+; RV32-NEXT:    srli s0, s1, 3
+; RV32-NEXT:    srli s2, s1, 2
+; RV32-NEXT:    add s3, s0, s0
+; RV32-NEXT:    add s4, s2, s0
+; RV32-NEXT:    vsetvli zero, s3, e8, mf2, tu, ma
+; RV32-NEXT:    vslideup.vx v9, v10, s0
+; RV32-NEXT:    vsetivli zero, 2, e8, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 12
+; RV32-NEXT:    vsetvli zero, s4, e8, mf2, tu, ma
+; RV32-NEXT:    vslideup.vx v9, v10, s2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; RV32-NEXT:    li a1, 3
+; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    call __mulsi3
----------------
mshockwave wrote:

should we add M extension for RV32?

https://github.com/llvm/llvm-project/pull/139893