[llvm] 4df2aba - [Hexagon] Calling conventions for floating point vectors
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 29 09:01:37 PST 2021
Author: Krzysztof Parzyszek
Date: 2021-12-29T09:01:07-08:00
New Revision: 4df2aba294db784546ee0bc08b41fc227b592d98
URL: https://github.com/llvm/llvm-project/commit/4df2aba294db784546ee0bc08b41fc227b592d98
DIFF: https://github.com/llvm/llvm-project/commit/4df2aba294db784546ee0bc08b41fc227b592d98.diff
LOG: [Hexagon] Calling conventions for floating point vectors
They are the same as for the other HVX vectors, but types need to be
listed explicitly. Also, add a detailed codegen testcase.
Co-authored-by: Abhikrant Sharma <quic_abhikran at quicinc.com>
Added:
llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll
Modified:
llvm/lib/Target/Hexagon/HexagonCallingConv.td
llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
llvm/test/CodeGen/Hexagon/autohvx/splat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/llvm/lib/Target/Hexagon/HexagonCallingConv.td
index 93e17e608dd10..cc41b569e4904 100644
--- a/llvm/lib/Target/Hexagon/HexagonCallingConv.td
+++ b/llvm/lib/Target/Hexagon/HexagonCallingConv.td
@@ -126,16 +126,16 @@ def CC_Hexagon_HVX: CallingConv<[
// HVX 128-byte mode
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>,
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
CCAssignToStack<128,128>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
CCAssignToStack<256,128>>>,
CCDelegateTo<CC_Hexagon>
@@ -152,10 +152,10 @@ def RetCC_Hexagon_HVX: CallingConv<[
// HVX 128-byte mode
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
CCAssignToReg<[V0]>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
CCAssignToReg<[W0]>>>,
CCDelegateTo<RetCC_Hexagon>
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 28f7c5414a2ad..a3a9097378e74 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -59,6 +59,7 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
+ addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
}
}
@@ -104,6 +105,9 @@ HexagonTargetLowering::initializeHVXLowering() {
// independent) handling of it would convert it to a load, which is
// not always the optimal choice.
setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom);
+ // Make concat-vectors custom to handle concats of more than 2 vectors.
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom);
}
for (MVT T : LegalV) {
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index ad80296877707..8d94a9978831e 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -265,9 +265,7 @@ let Predicates = [UseHVX] in {
// These should be preferred over a vsplat of 0.
def: Pat<(VecI8 vzero), (V6_vd0)>;
def: Pat<(VecI16 vzero), (V6_vd0)>;
- def: Pat<(VecF16 vzero), (V6_vd0)>;
def: Pat<(VecI32 vzero), (V6_vd0)>;
- def: Pat<(VecF32 vzero), (V6_vd0)>;
def: Pat<(VecPI8 vzero), (PS_vdd0)>;
def: Pat<(VecPI16 vzero), (PS_vdd0)>;
def: Pat<(VecPI32 vzero), (PS_vdd0)>;
@@ -303,7 +301,22 @@ let Predicates = [UseHVX] in {
(V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
}
-let Predicates = [UseHVXFloatingPoint] in {
+let Predicates = [UseHVX, UseHVXFloatingPoint] in {
+ let AddedComplexity = 100 in {
+ def: Pat<(VecF16 vzero), (V6_vd0)>;
+ def: Pat<(VecF32 vzero), (V6_vd0)>;
+ def: Pat<(VecPF16 vzero), (PS_vdd0)>;
+ def: Pat<(VecPF32 vzero), (PS_vdd0)>;
+
+ def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>;
+ def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>;
+ }
+
+ def: Pat<(VecPF16 (concat_vectors HVF16:$Vs, HVF16:$Vt)),
+ (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
+ def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)),
+ (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
+
def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt),
(V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt),
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll
new file mode 100644
index 0000000000000..884eb6e7ac759
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll
@@ -0,0 +1,1528 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+define void @f0(<128 x i8> %a0, <128 x i8>* %a1) #0 {
+; CHECK-LABEL: f0:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ ret void
+}
+
+define void @f1(<128 x i8> %a0, <128 x i8> %a1, <128 x i8>* %a2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a2, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a2, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ ret void
+}
+
+define void @f2(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8>* %a3) #0 {
+; CHECK-LABEL: f2:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ ret void
+}
+
+define void @f3(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8>* %a4) #0 {
+; CHECK-LABEL: f3:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ ret void
+}
+
+define void @f4(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8>* %a5) #0 {
+; CHECK-LABEL: f4:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ ret void
+}
+
+define void @f5(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8>* %a6) #0 {
+; CHECK-LABEL: f5:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ ret void
+}
+
+define void @f6(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8>* %a7) #0 {
+; CHECK-LABEL: f6:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ ret void
+}
+
+define void @f7(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8>* %a8) #0 {
+; CHECK-LABEL: f7:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ ret void
+}
+
+define void @f8(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8>* %a9) #0 {
+; CHECK-LABEL: f8:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r0,#1024)
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r1+#0) = v8
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ ret void
+}
+
+define void @f9(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8>* %a10) #0 {
+; CHECK-LABEL: f9:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r0,#1024)
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r1+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ %v9 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 9
+ store <128 x i8> %a9, <128 x i8>* %v9, align 128
+ ret void
+}
+
+define void @f10(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8>* %a11) #0 {
+; CHECK-LABEL: f10:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r0,#1024)
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r1+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ %v9 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 9
+ store <128 x i8> %a9, <128 x i8>* %v9, align 128
+ %v10 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 10
+ store <128 x i8> %a10, <128 x i8>* %v10, align 128
+ ret void
+}
+
+define void @f11(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8>* %a12) #0 {
+; CHECK-LABEL: f11:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r0,#1024)
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r1+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ %v9 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 9
+ store <128 x i8> %a9, <128 x i8>* %v9, align 128
+ %v10 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 10
+ store <128 x i8> %a10, <128 x i8>* %v10, align 128
+ %v11 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 11
+ store <128 x i8> %a11, <128 x i8>* %v11, align 128
+ ret void
+}
+
+define void @f12(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8>* %a13) #0 {
+; CHECK-LABEL: f12:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r0,#1024)
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = add(r0,#1536)
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r1+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r5+#0) = v12
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ %v9 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 9
+ store <128 x i8> %a9, <128 x i8>* %v9, align 128
+ %v10 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 10
+ store <128 x i8> %a10, <128 x i8>* %v10, align 128
+ %v11 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 11
+ store <128 x i8> %a11, <128 x i8>* %v11, align 128
+ %v12 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 12
+ store <128 x i8> %a12, <128 x i8>* %v12, align 128
+ ret void
+}
+
+define void @f13(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8>* %a14) #0 {
+; CHECK-LABEL: f13:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r1 = add(r0,#1024)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1664)
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = add(r0,#1536)
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r1+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r2+#0) = v13
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ %v9 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 9
+ store <128 x i8> %a9, <128 x i8>* %v9, align 128
+ %v10 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 10
+ store <128 x i8> %a10, <128 x i8>* %v10, align 128
+ %v11 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 11
+ store <128 x i8> %a11, <128 x i8>* %v11, align 128
+ %v12 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 12
+ store <128 x i8> %a12, <128 x i8>* %v12, align 128
+ %v13 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 13
+ store <128 x i8> %a13, <128 x i8>* %v13, align 128
+ ret void
+}
+
+define void @f14(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8>* %a15) #0 {
+; CHECK-LABEL: f14:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: r1 = add(r0,#1024)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1792)
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = add(r0,#1664)
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = add(r0,#1536)
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r1+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v13
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r2+#0) = v14
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ %v9 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 9
+ store <128 x i8> %a9, <128 x i8>* %v9, align 128
+ %v10 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 10
+ store <128 x i8> %a10, <128 x i8>* %v10, align 128
+ %v11 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 11
+ store <128 x i8> %a11, <128 x i8>* %v11, align 128
+ %v12 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 12
+ store <128 x i8> %a12, <128 x i8>* %v12, align 128
+ %v13 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 13
+ store <128 x i8> %a13, <128 x i8>* %v13, align 128
+ %v14 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 14
+ store <128 x i8> %a14, <128 x i8>* %v14, align 128
+ ret void
+}
+
+define void @f15(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8> %a15, <128 x i8>* %a16) #0 {
+; CHECK-LABEL: f15:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r1 = add(r0,#1024)
+; CHECK-NEXT: r6 = add(r0,#1408)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = add(r0,#1536)
+; CHECK-NEXT: r4 = add(r0,#1664)
+; CHECK-NEXT: r2 = add(r0,#1920)
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = add(r0,#1792)
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r1+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r6+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r5+#0) = v12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v13
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r2+#0) = v15
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ %v9 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 9
+ store <128 x i8> %a9, <128 x i8>* %v9, align 128
+ %v10 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 10
+ store <128 x i8> %a10, <128 x i8>* %v10, align 128
+ %v11 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 11
+ store <128 x i8> %a11, <128 x i8>* %v11, align 128
+ %v12 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 12
+ store <128 x i8> %a12, <128 x i8>* %v12, align 128
+ %v13 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 13
+ store <128 x i8> %a13, <128 x i8>* %v13, align 128
+ %v14 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 14
+ store <128 x i8> %a14, <128 x i8>* %v14, align 128
+ %v15 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 15
+ store <128 x i8> %a15, <128 x i8>* %v15, align 128
+ ret void
+}
+
+define void @f16(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8> %a15, <128 x i8> %a16, <128 x i8>* %a17) #0 {
+; CHECK-LABEL: f16:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r30,#8)
+; CHECK-NEXT: r7 = add(r0,#1024)
+; CHECK-NEXT: r6 = add(r0,#1536)
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = add(r0,#1664)
+; CHECK-NEXT: r3 = add(r0,#1920)
+; CHECK-NEXT: r2 = add(r0,#2048)
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = add(r0,#1792)
+; CHECK-NEXT: r29 = and(r29,#-128)
+; CHECK-NEXT: v16 = vmem(r1+#0)
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r7+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r6+#0) = v12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r5+#0) = v13
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v15
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r2+#0) = v16
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 0
+ store <128 x i8> %a0, <128 x i8>* %v0, align 128
+ %v1 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 1
+ store <128 x i8> %a1, <128 x i8>* %v1, align 128
+ %v2 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 2
+ store <128 x i8> %a2, <128 x i8>* %v2, align 128
+ %v3 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 3
+ store <128 x i8> %a3, <128 x i8>* %v3, align 128
+ %v4 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 4
+ store <128 x i8> %a4, <128 x i8>* %v4, align 128
+ %v5 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 5
+ store <128 x i8> %a5, <128 x i8>* %v5, align 128
+ %v6 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 6
+ store <128 x i8> %a6, <128 x i8>* %v6, align 128
+ %v7 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 7
+ store <128 x i8> %a7, <128 x i8>* %v7, align 128
+ %v8 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 8
+ store <128 x i8> %a8, <128 x i8>* %v8, align 128
+ %v9 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 9
+ store <128 x i8> %a9, <128 x i8>* %v9, align 128
+ %v10 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 10
+ store <128 x i8> %a10, <128 x i8>* %v10, align 128
+ %v11 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 11
+ store <128 x i8> %a11, <128 x i8>* %v11, align 128
+ %v12 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 12
+ store <128 x i8> %a12, <128 x i8>* %v12, align 128
+ %v13 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 13
+ store <128 x i8> %a13, <128 x i8>* %v13, align 128
+ %v14 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 14
+ store <128 x i8> %a14, <128 x i8>* %v14, align 128
+ %v15 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 15
+ store <128 x i8> %a15, <128 x i8>* %v15, align 128
+ %v16 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 16
+ store <128 x i8> %a16, <128 x i8>* %v16, align 128
+ ret void
+}
+
+define void @f17(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2, <64 x i16> %a3, <64 x i16> %a4, <64 x i16> %a5, <64 x i16> %a6, <64 x i16> %a7, <64 x i16> %a8, <64 x i16> %a9, <64 x i16> %a10, <64 x i16> %a11, <64 x i16> %a12, <64 x i16> %a13, <64 x i16> %a14, <64 x i16> %a15, <64 x i16> %a16, <64 x i16>* %a17) #0 {
+; CHECK-LABEL: f17:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r30,#8)
+; CHECK-NEXT: r7 = add(r0,#1024)
+; CHECK-NEXT: r6 = add(r0,#1536)
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = add(r0,#1664)
+; CHECK-NEXT: r3 = add(r0,#1920)
+; CHECK-NEXT: r2 = add(r0,#2048)
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = add(r0,#1792)
+; CHECK-NEXT: r29 = and(r29,#-128)
+; CHECK-NEXT: v16 = vmem(r1+#0)
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r7+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r6+#0) = v12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r5+#0) = v13
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v15
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r2+#0) = v16
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 0
+ store <64 x i16> %a0, <64 x i16>* %v0, align 128
+ %v1 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 1
+ store <64 x i16> %a1, <64 x i16>* %v1, align 128
+ %v2 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 2
+ store <64 x i16> %a2, <64 x i16>* %v2, align 128
+ %v3 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 3
+ store <64 x i16> %a3, <64 x i16>* %v3, align 128
+ %v4 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 4
+ store <64 x i16> %a4, <64 x i16>* %v4, align 128
+ %v5 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 5
+ store <64 x i16> %a5, <64 x i16>* %v5, align 128
+ %v6 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 6
+ store <64 x i16> %a6, <64 x i16>* %v6, align 128
+ %v7 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 7
+ store <64 x i16> %a7, <64 x i16>* %v7, align 128
+ %v8 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 8
+ store <64 x i16> %a8, <64 x i16>* %v8, align 128
+ %v9 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 9
+ store <64 x i16> %a9, <64 x i16>* %v9, align 128
+ %v10 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 10
+ store <64 x i16> %a10, <64 x i16>* %v10, align 128
+ %v11 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 11
+ store <64 x i16> %a11, <64 x i16>* %v11, align 128
+ %v12 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 12
+ store <64 x i16> %a12, <64 x i16>* %v12, align 128
+ %v13 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 13
+ store <64 x i16> %a13, <64 x i16>* %v13, align 128
+ %v14 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 14
+ store <64 x i16> %a14, <64 x i16>* %v14, align 128
+ %v15 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 15
+ store <64 x i16> %a15, <64 x i16>* %v15, align 128
+ %v16 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 16
+ store <64 x i16> %a16, <64 x i16>* %v16, align 128
+ ret void
+}
+
+define void @f18(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2, <32 x i32> %a3, <32 x i32> %a4, <32 x i32> %a5, <32 x i32> %a6, <32 x i32> %a7, <32 x i32> %a8, <32 x i32> %a9, <32 x i32> %a10, <32 x i32> %a11, <32 x i32> %a12, <32 x i32> %a13, <32 x i32> %a14, <32 x i32> %a15, <32 x i32> %a16, <32 x i32>* %a17) #0 {
+; CHECK-LABEL: f18:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r30,#8)
+; CHECK-NEXT: r7 = add(r0,#1024)
+; CHECK-NEXT: r6 = add(r0,#1536)
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = add(r0,#1664)
+; CHECK-NEXT: r3 = add(r0,#1920)
+; CHECK-NEXT: r2 = add(r0,#2048)
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = add(r0,#1792)
+; CHECK-NEXT: r29 = and(r29,#-128)
+; CHECK-NEXT: v16 = vmem(r1+#0)
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r7+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r6+#0) = v12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r5+#0) = v13
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v15
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r2+#0) = v16
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 0
+ store <32 x i32> %a0, <32 x i32>* %v0, align 128
+ %v1 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 1
+ store <32 x i32> %a1, <32 x i32>* %v1, align 128
+ %v2 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 2
+ store <32 x i32> %a2, <32 x i32>* %v2, align 128
+ %v3 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 3
+ store <32 x i32> %a3, <32 x i32>* %v3, align 128
+ %v4 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 4
+ store <32 x i32> %a4, <32 x i32>* %v4, align 128
+ %v5 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 5
+ store <32 x i32> %a5, <32 x i32>* %v5, align 128
+ %v6 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 6
+ store <32 x i32> %a6, <32 x i32>* %v6, align 128
+ %v7 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 7
+ store <32 x i32> %a7, <32 x i32>* %v7, align 128
+ %v8 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 8
+ store <32 x i32> %a8, <32 x i32>* %v8, align 128
+ %v9 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 9
+ store <32 x i32> %a9, <32 x i32>* %v9, align 128
+ %v10 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 10
+ store <32 x i32> %a10, <32 x i32>* %v10, align 128
+ %v11 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 11
+ store <32 x i32> %a11, <32 x i32>* %v11, align 128
+ %v12 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 12
+ store <32 x i32> %a12, <32 x i32>* %v12, align 128
+ %v13 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 13
+ store <32 x i32> %a13, <32 x i32>* %v13, align 128
+ %v14 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 14
+ store <32 x i32> %a14, <32 x i32>* %v14, align 128
+ %v15 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 15
+ store <32 x i32> %a15, <32 x i32>* %v15, align 128
+ %v16 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 16
+ store <32 x i32> %a16, <32 x i32>* %v16, align 128
+ ret void
+}
+
+define void @f19(<64 x half> %a0, <64 x half> %a1, <64 x half> %a2, <64 x half> %a3, <64 x half> %a4, <64 x half> %a5, <64 x half> %a6, <64 x half> %a7, <64 x half> %a8, <64 x half> %a9, <64 x half> %a10, <64 x half> %a11, <64 x half> %a12, <64 x half> %a13, <64 x half> %a14, <64 x half> %a15, <64 x half> %a16, <64 x half>* %a17) #0 {
+; CHECK-LABEL: f19:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r30,#8)
+; CHECK-NEXT: r7 = add(r0,#1024)
+; CHECK-NEXT: r6 = add(r0,#1536)
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = add(r0,#1664)
+; CHECK-NEXT: r3 = add(r0,#1920)
+; CHECK-NEXT: r2 = add(r0,#2048)
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = add(r0,#1792)
+; CHECK-NEXT: r29 = and(r29,#-128)
+; CHECK-NEXT: v16 = vmem(r1+#0)
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r7+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r6+#0) = v12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r5+#0) = v13
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v15
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r2+#0) = v16
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <64 x half>, <64 x half>* %a17, i32 0
+ store <64 x half> %a0, <64 x half>* %v0, align 128
+ %v1 = getelementptr <64 x half>, <64 x half>* %a17, i32 1
+ store <64 x half> %a1, <64 x half>* %v1, align 128
+ %v2 = getelementptr <64 x half>, <64 x half>* %a17, i32 2
+ store <64 x half> %a2, <64 x half>* %v2, align 128
+ %v3 = getelementptr <64 x half>, <64 x half>* %a17, i32 3
+ store <64 x half> %a3, <64 x half>* %v3, align 128
+ %v4 = getelementptr <64 x half>, <64 x half>* %a17, i32 4
+ store <64 x half> %a4, <64 x half>* %v4, align 128
+ %v5 = getelementptr <64 x half>, <64 x half>* %a17, i32 5
+ store <64 x half> %a5, <64 x half>* %v5, align 128
+ %v6 = getelementptr <64 x half>, <64 x half>* %a17, i32 6
+ store <64 x half> %a6, <64 x half>* %v6, align 128
+ %v7 = getelementptr <64 x half>, <64 x half>* %a17, i32 7
+ store <64 x half> %a7, <64 x half>* %v7, align 128
+ %v8 = getelementptr <64 x half>, <64 x half>* %a17, i32 8
+ store <64 x half> %a8, <64 x half>* %v8, align 128
+ %v9 = getelementptr <64 x half>, <64 x half>* %a17, i32 9
+ store <64 x half> %a9, <64 x half>* %v9, align 128
+ %v10 = getelementptr <64 x half>, <64 x half>* %a17, i32 10
+ store <64 x half> %a10, <64 x half>* %v10, align 128
+ %v11 = getelementptr <64 x half>, <64 x half>* %a17, i32 11
+ store <64 x half> %a11, <64 x half>* %v11, align 128
+ %v12 = getelementptr <64 x half>, <64 x half>* %a17, i32 12
+ store <64 x half> %a12, <64 x half>* %v12, align 128
+ %v13 = getelementptr <64 x half>, <64 x half>* %a17, i32 13
+ store <64 x half> %a13, <64 x half>* %v13, align 128
+ %v14 = getelementptr <64 x half>, <64 x half>* %a17, i32 14
+ store <64 x half> %a14, <64 x half>* %v14, align 128
+ %v15 = getelementptr <64 x half>, <64 x half>* %a17, i32 15
+ store <64 x half> %a15, <64 x half>* %v15, align 128
+ %v16 = getelementptr <64 x half>, <64 x half>* %a17, i32 16
+ store <64 x half> %a16, <64 x half>* %v16, align 128
+ ret void
+}
+
+define void @f20(<32 x float> %a0, <32 x float> %a1, <32 x float> %a2, <32 x float> %a3, <32 x float> %a4, <32 x float> %a5, <32 x float> %a6, <32 x float> %a7, <32 x float> %a8, <32 x float> %a9, <32 x float> %a10, <32 x float> %a11, <32 x float> %a12, <32 x float> %a13, <32 x float> %a14, <32 x float> %a15, <32 x float> %a16, <32 x float>* %a17) #0 {
+; CHECK-LABEL: f20:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = add(r0,#1152)
+; CHECK-NEXT: r3 = add(r0,#1280)
+; CHECK-NEXT: r4 = add(r0,#1408)
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = add(r30,#8)
+; CHECK-NEXT: r7 = add(r0,#1024)
+; CHECK-NEXT: r6 = add(r0,#1536)
+; CHECK-NEXT: vmem(r2+#0) = v9
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = add(r0,#1664)
+; CHECK-NEXT: r3 = add(r0,#1920)
+; CHECK-NEXT: r2 = add(r0,#2048)
+; CHECK-NEXT: vmem(r3+#0) = v10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = add(r0,#1792)
+; CHECK-NEXT: r29 = and(r29,#-128)
+; CHECK-NEXT: v16 = vmem(r1+#0)
+; CHECK-NEXT: vmem(r4+#0) = v11
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#1) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r7+#0) = v8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#2) = v2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#3) = v3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#4) = v4
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r6+#0) = v12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#5) = v5
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r5+#0) = v13
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#6) = v6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r4+#0) = v14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r0+#7) = v7
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r3+#0) = v15
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmem(r2+#0) = v16
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+b0:
+ %v0 = getelementptr <32 x float>, <32 x float>* %a17, i32 0
+ store <32 x float> %a0, <32 x float>* %v0, align 128
+ %v1 = getelementptr <32 x float>, <32 x float>* %a17, i32 1
+ store <32 x float> %a1, <32 x float>* %v1, align 128
+ %v2 = getelementptr <32 x float>, <32 x float>* %a17, i32 2
+ store <32 x float> %a2, <32 x float>* %v2, align 128
+ %v3 = getelementptr <32 x float>, <32 x float>* %a17, i32 3
+ store <32 x float> %a3, <32 x float>* %v3, align 128
+ %v4 = getelementptr <32 x float>, <32 x float>* %a17, i32 4
+ store <32 x float> %a4, <32 x float>* %v4, align 128
+ %v5 = getelementptr <32 x float>, <32 x float>* %a17, i32 5
+ store <32 x float> %a5, <32 x float>* %v5, align 128
+ %v6 = getelementptr <32 x float>, <32 x float>* %a17, i32 6
+ store <32 x float> %a6, <32 x float>* %v6, align 128
+ %v7 = getelementptr <32 x float>, <32 x float>* %a17, i32 7
+ store <32 x float> %a7, <32 x float>* %v7, align 128
+ %v8 = getelementptr <32 x float>, <32 x float>* %a17, i32 8
+ store <32 x float> %a8, <32 x float>* %v8, align 128
+ %v9 = getelementptr <32 x float>, <32 x float>* %a17, i32 9
+ store <32 x float> %a9, <32 x float>* %v9, align 128
+ %v10 = getelementptr <32 x float>, <32 x float>* %a17, i32 10
+ store <32 x float> %a10, <32 x float>* %v10, align 128
+ %v11 = getelementptr <32 x float>, <32 x float>* %a17, i32 11
+ store <32 x float> %a11, <32 x float>* %v11, align 128
+ %v12 = getelementptr <32 x float>, <32 x float>* %a17, i32 12
+ store <32 x float> %a12, <32 x float>* %v12, align 128
+ %v13 = getelementptr <32 x float>, <32 x float>* %a17, i32 13
+ store <32 x float> %a13, <32 x float>* %v13, align 128
+ %v14 = getelementptr <32 x float>, <32 x float>* %a17, i32 14
+ store <32 x float> %a14, <32 x float>* %v14, align 128
+ %v15 = getelementptr <32 x float>, <32 x float>* %a17, i32 15
+ store <32 x float> %a15, <32 x float>* %v15, align 128
+ %v16 = getelementptr <32 x float>, <32 x float>* %a17, i32 16
+ store <32 x float> %a16, <32 x float>* %v16, align 128
+ ret void
+}
+
+define <128 x i8> @f21() #0 {
+; CHECK-LABEL: f21:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vxor(v0,v0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <128 x i8> undef, i8 0, i32 0
+ %v1 = shufflevector <128 x i8> %v0, <128 x i8> undef, <128 x i32> zeroinitializer
+ ret <128 x i8> %v1
+}
+
+define <256 x i8> @f22() #0 {
+; CHECK-LABEL: f22:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <256 x i8> undef, i8 0, i32 0
+ %v1 = shufflevector <256 x i8> %v0, <256 x i8> undef, <256 x i32> zeroinitializer
+ ret <256 x i8> %v1
+}
+
+define <64 x i16> @f23() #0 {
+; CHECK-LABEL: f23:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vxor(v0,v0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <64 x i16> undef, i16 0, i32 0
+ %v1 = shufflevector <64 x i16> %v0, <64 x i16> undef, <64 x i32> zeroinitializer
+ ret <64 x i16> %v1
+}
+
+define <128 x i16> @f24() #0 {
+; CHECK-LABEL: f24:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <128 x i16> undef, i16 0, i32 0
+ %v1 = shufflevector <128 x i16> %v0, <128 x i16> undef, <128 x i32> zeroinitializer
+ ret <128 x i16> %v1
+}
+
+define <32 x i32> @f25() #0 {
+; CHECK-LABEL: f25:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vxor(v0,v0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <32 x i32> undef, i32 0, i32 0
+ %v1 = shufflevector <32 x i32> %v0, <32 x i32> undef, <32 x i32> zeroinitializer
+ ret <32 x i32> %v1
+}
+
+define <64 x i32> @f26() #0 {
+; CHECK-LABEL: f26:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <64 x i32> undef, i32 0, i32 0
+ %v1 = shufflevector <64 x i32> %v0, <64 x i32> undef, <64 x i32> zeroinitializer
+ ret <64 x i32> %v1
+}
+
+define <64 x half> @f27() #0 {
+; CHECK-LABEL: f27:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vxor(v0,v0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <64 x half> undef, half 0xH0, i32 0
+ %v1 = shufflevector <64 x half> %v0, <64 x half> undef, <64 x i32> zeroinitializer
+ ret <64 x half> %v1
+}
+
+define <128 x half> @f28() #0 {
+; CHECK-LABEL: f28:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <128 x half> undef, half 0xH0, i32 0
+ %v1 = shufflevector <128 x half> %v0, <128 x half> undef, <128 x i32> zeroinitializer
+ ret <128 x half> %v1
+}
+
+define <32 x float> @f29() #0 {
+; CHECK-LABEL: f29:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vxor(v0,v0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <32 x float> undef, float 0.0, i32 0
+ %v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer
+ ret <32 x float> %v1
+}
+
+define <64 x float> @f30() #0 {
+; CHECK-LABEL: f30:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = insertelement <64 x float> undef, float 0.0, i32 0
+ %v1 = shufflevector <64 x float> %v0, <64 x float> undef, <64 x i32> zeroinitializer
+ ret <64 x float> %v1
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll
index bbea3a21270c5..eea089851e9ca 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll
@@ -402,9 +402,8 @@ define <64 x half> @f24(i16 %a0) #2 {
; CHECK-LABEL: f24:
; CHECK: // %bb.0:
; CHECK-NEXT: {
-; CHECK-NEXT: v0.h = vsplat(r1)
+; CHECK-NEXT: v0.h = vsplat(r0)
; CHECK-NEXT: jumpr r31
-; CHECK-NEXT: vmem(r0+#0) = v0.new
; CHECK-NEXT: }
%v0 = bitcast i16 %a0 to half
%v1 = insertelement <64 x half> undef, half %v0, i32 0
@@ -417,9 +416,8 @@ define <32 x float> @f25(float %a0) #2 {
; CHECK-LABEL: f25:
; CHECK: // %bb.0:
; CHECK-NEXT: {
-; CHECK-NEXT: v0 = vsplat(r1)
+; CHECK-NEXT: v0 = vsplat(r0)
; CHECK-NEXT: jumpr r31
-; CHECK-NEXT: vmem(r0+#0) = v0.new
; CHECK-NEXT: }
%v0 = insertelement <32 x float> undef, float %a0, i32 0
%v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer
More information about the llvm-commits
mailing list