[llvm] [IA][RISCV] Add support for vp.load/vp.store with shufflevector (PR #135445)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 21 02:33:45 PDT 2025
================
@@ -616,59 +616,6 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
ret void
}
-; We only support scalable vectors for now.
-define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @not_scalable_vectors(ptr %ptr, i32 %evl) {
-; RV32-LABEL: not_scalable_vectors:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: li a0, 32
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vnsrl.wx v12, v8, a0
-; RV32-NEXT: vnsrl.wi v11, v8, 0
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vnsrl.wx v10, v11, a0
-; RV32-NEXT: vnsrl.wi v8, v11, 0
-; RV32-NEXT: vnsrl.wx v11, v12, a0
-; RV32-NEXT: vnsrl.wi v9, v12, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: not_scalable_vectors:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a1, a1, 34
-; RV64-NEXT: srli a1, a1, 32
-; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vnsrl.wx v12, v8, a0
-; RV64-NEXT: vnsrl.wi v11, v8, 0
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vnsrl.wx v10, v11, a0
-; RV64-NEXT: vnsrl.wi v8, v11, 0
-; RV64-NEXT: vnsrl.wx v11, v12, a0
-; RV64-NEXT: vnsrl.wi v9, v12, 0
-; RV64-NEXT: ret
- %rvl = mul i32 %evl, 4
- %wide.masked.load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %rvl)
- %d0 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %wide.masked.load)
- %d0.0 = extractvalue { <4 x i32>, <4 x i32> } %d0, 0
- %d0.1 = extractvalue { <4 x i32>, <4 x i32> } %d0, 1
- %d1 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.0)
- %t0 = extractvalue { <2 x i32>, <2 x i32> } %d1, 0
- %t2 = extractvalue { <2 x i32>, <2 x i32> } %d1, 1
- %d2 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.1)
- %t1 = extractvalue { <2 x i32>, <2 x i32> } %d2, 0
- %t3 = extractvalue { <2 x i32>, <2 x i32> } %d2, 1
-
- %res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> %t0, 0
- %res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1
- %res2 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res1, <2 x i32> %t2, 2
- %res3 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res2, <2 x i32> %t3, 3
- ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3
-}
-
----------------
lukel97 wrote:
If you're planning on adding support for fixed-length vectors + interleave intrinsics, should we move this test into `fixed-vectors-interleaved-access.ll`?
Btw do we currently canonicalize fixed-length interleave intrinsics into shuffle vectors? If not, should we? That might simplify the amount of work InterleavedAccessPass has to do
https://github.com/llvm/llvm-project/pull/135445
More information about the llvm-commits
mailing list