[llvm] 841c8d4 - [LV] Add tests for more interleave group factors on AArch64 and RISC-V. NFC
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon May 26 10:22:24 PDT 2025
Author: Luke Lau
Date: 2025-05-26T18:21:27+01:00
New Revision: 841c8d48a62dc62bf8a23883225fd88d6848e45c
URL: https://github.com/llvm/llvm-project/commit/841c8d48a62dc62bf8a23883225fd88d6848e45c
DIFF: https://github.com/llvm/llvm-project/commit/841c8d48a62dc62bf8a23883225fd88d6848e45c.diff
LOG: [LV] Add tests for more interleave group factors on AArch64 and RISC-V. NFC
The plan is to eventually add support for scalably vectorizing these for
non-power-of-2 factors, see https://github.com/llvm/llvm-project/pull/139893
Simultaneously, we need to add a test to make sure we don't generate
@llvm.vector.[de]interleave3 for AArch64 if we can't lower it (yet)
Added:
Modified:
llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index fe68501ad94f4..6861644fc9969 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -1353,6 +1353,103 @@ end:
ret void
}
+; Check vectorization on an interleaved load/store groups of factor 3
+
+; for (int i = 0; i < 1024; ++i) {
+; dst[i].x = a[i].x + b[i].x;
+; dst[i].y = a[i].y - b[i].y;
+; dst[i].z = a[i].z << b[i].z;
+; }
+;
+; TODO: Support scalable interleave groups once we can also codegen
+; @llvm.[de]interleave3
+%struct.xyz = type { i32, i32, i32 }
+
+define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr readonly %a, ptr readonly %b) {
+; CHECK-LABEL: @interleave_deinterleave_factor3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[TMP1]], 1024
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 1024, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_XYZ:%.*]], ptr [[A:%.*]], <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_XYZ]], ptr [[B:%.*]], <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[TMP14:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_XYZ]], ptr [[DST:%.*]], <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP14]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, <vscale x 4 x ptr> [[TMP19]], i64 4
+; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, <vscale x 4 x ptr> [[TMP20]], i64 4
+; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP22]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[TMP16:%.*]] = sub nsw <vscale x 4 x i32> [[TMP8]], [[TMP12]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, <vscale x 4 x ptr> [[TMP10]], i64 4
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[TMP23]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, <vscale x 4 x ptr> [[TMP19]], i64 8
+; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, <vscale x 4 x ptr> [[TMP20]], i64 8
+; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP24]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[TMP17:%.*]] = shl <vscale x 4 x i32> [[TMP9]], [[TMP13]]
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, <vscale x 4 x ptr> [[TMP10]], i64 8
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP17]], <vscale x 4 x ptr> [[TMP25]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds %struct.xyz, ptr %a, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.xyz, ptr %b, i64 %indvars.iv
+ %1 = load i32, ptr %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %arrayidx5 = getelementptr inbounds %struct.xyz, ptr %dst, i64 %indvars.iv
+ store i32 %add, ptr %arrayidx5, align 4
+ %y = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4
+ %2 = load i32, ptr %y, align 4
+ %y11 = getelementptr inbounds nuw i8, ptr %arrayidx2, i64 4
+ %3 = load i32, ptr %y11, align 4
+ %sub = sub nsw i32 %2, %3
+ %y14 = getelementptr inbounds nuw i8, ptr %arrayidx5, i64 4
+ store i32 %sub, ptr %y14, align 4
+ %z = getelementptr inbounds nuw i8, ptr %arrayidx, i64 8
+ %4 = load i32, ptr %z, align 4
+ %z19 = getelementptr inbounds nuw i8, ptr %arrayidx2, i64 8
+ %5 = load i32, ptr %z19, align 4
+ %shl = shl i32 %4, %5
+ %z22 = getelementptr inbounds nuw i8, ptr %arrayidx5, i64 8
+ store i32 %shl, ptr %z22, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
; Check vectorization on an interleaved load/store groups of factor 4
; for (int i = 0; i < 1024; ++i) {
@@ -1413,7 +1510,7 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a
; CHECK-NEXT: store <vscale x 16 x i32> [[INTERLEAVED_VEC13]], ptr [[TMP21]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -1532,7 +1629,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A,
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
index 7d1d2e820a57f..f48691bd54417 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
@@ -365,17 +365,17 @@ define void @load_store_factor3_i32(ptr %p) {
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
-; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
-; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP8]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP7]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
; CHECK-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], 8
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -469,17 +469,17 @@ define void @load_store_factor3_i32(ptr %p) {
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; SCALABLE-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
-; SCALABLE-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
-; SCALABLE-NEXT: [[TMP5:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
-; SCALABLE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SCALABLE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SCALABLE-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP8]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; SCALABLE-NEXT: [[TMP2:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; SCALABLE-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; SCALABLE-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
+; SCALABLE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SCALABLE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SCALABLE-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP7]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
; SCALABLE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], 8
-; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
@@ -552,17 +552,17 @@ define void @load_store_factor3_i64(ptr %p) {
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
-; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP8]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> [[TMP6]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP7]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
; CHECK-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], 4
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -656,17 +656,17 @@ define void @load_store_factor3_i64(ptr %p) {
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; SCALABLE-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
-; SCALABLE-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; SCALABLE-NEXT: [[TMP5:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
-; SCALABLE-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SCALABLE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; SCALABLE-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP8]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; SCALABLE-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; SCALABLE-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; SCALABLE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SCALABLE-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SCALABLE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> [[TMP6]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP7]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
; SCALABLE-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], 4
-; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
@@ -725,6 +725,1082 @@ exit:
ret void
}
+define void @load_store_factor4(ptr %p) {
+; CHECK-LABEL: @load_store_factor4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> [[WIDE_VEC]])
+; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP8]])
+; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP9]])
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 1)
+; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], splat (i64 2)
+; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP12]], splat (i64 3)
+; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i64> [[TMP13]], splat (i64 4)
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP14]], <vscale x 2 x i64> [[TMP16]])
+; CHECK-NEXT: [[INTERLEAVED_VEC3:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP15]], <vscale x 2 x i64> [[TMP17]])
+; CHECK-NEXT: [[INTERLEAVED_VEC4:%.*]] = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> [[INTERLEAVED_VEC]], <vscale x 4 x i64> [[INTERLEAVED_VEC3]])
+; CHECK-NEXT: store <vscale x 8 x i64> [[INTERLEAVED_VEC4]], ptr [[TMP7]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 4
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+; FIXED-LABEL: @load_store_factor4(
+; FIXED-NEXT: entry:
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; FIXED: vector.ph:
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
+; FIXED: vector.body:
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 4
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP1]], align 8
+; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; FIXED-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT: [[TMP5:%.*]] = add <4 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; FIXED-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP8]], <16 x i64> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; FIXED-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; FIXED: middle.block:
+; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; FIXED: scalar.ph:
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED: loop:
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 4
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; FIXED: exit:
+; FIXED-NEXT: ret void
+;
+; SCALABLE-LABEL: @load_store_factor4(
+; SCALABLE-NEXT: entry:
+; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
+; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; SCALABLE: vector.ph:
+; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
+; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
+; SCALABLE: vector.body:
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 [[INDEX]], 4
+; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[TMP7]], align 8
+; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> [[WIDE_VEC]])
+; SCALABLE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP8]])
+; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP9]])
+; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 0
+; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 0
+; SCALABLE-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 1
+; SCALABLE-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 1
+; SCALABLE-NEXT: [[TMP14:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 1)
+; SCALABLE-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], splat (i64 2)
+; SCALABLE-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP12]], splat (i64 3)
+; SCALABLE-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i64> [[TMP13]], splat (i64 4)
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP14]], <vscale x 2 x i64> [[TMP16]])
+; SCALABLE-NEXT: [[INTERLEAVED_VEC3:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP15]], <vscale x 2 x i64> [[TMP17]])
+; SCALABLE-NEXT: [[INTERLEAVED_VEC4:%.*]] = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> [[INTERLEAVED_VEC]], <vscale x 4 x i64> [[INTERLEAVED_VEC3]])
+; SCALABLE-NEXT: store <vscale x 8 x i64> [[INTERLEAVED_VEC4]], ptr [[TMP7]], align 8
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; SCALABLE: middle.block:
+; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; SCALABLE: scalar.ph:
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE: loop:
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 4
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; SCALABLE: exit:
+; SCALABLE-NEXT: ret void
+;
+entry:
+ br label %loop
+loop:
+ %i = phi i64 [0, %entry], [%nexti, %loop]
+
+ %offset0 = mul i64 %i, 4
+ %q0 = getelementptr i64, ptr %p, i64 %offset0
+ %x0 = load i64, ptr %q0
+ %y0 = add i64 %x0, 1
+ store i64 %y0, ptr %q0
+
+ %offset1 = add i64 %offset0, 1
+ %q1 = getelementptr i64, ptr %p, i64 %offset1
+ %x1 = load i64, ptr %q1
+ %y1 = add i64 %x1, 2
+ store i64 %y1, ptr %q1
+
+ %offset2 = add i64 %offset1, 1
+ %q2 = getelementptr i64, ptr %p, i64 %offset2
+ %x2 = load i64, ptr %q2
+ %y2 = add i64 %x2, 3
+ store i64 %y2, ptr %q2
+
+ %offset3 = add i64 %offset2, 1
+ %q3 = getelementptr i64, ptr %p, i64 %offset3
+ %x3 = load i64, ptr %q3
+ %y3 = add i64 %x3, 4
+ store i64 %y3, ptr %q3
+
+ %nexti = add i64 %i, 1
+ %done = icmp eq i64 %nexti, 1024
+ br i1 %done, label %exit, label %loop
+exit:
+ ret void
+}
+
+define void @load_store_factor5(ptr %p) {
+; CHECK-LABEL: @load_store_factor5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 5
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <10 x i64>, ptr [[TMP1]], align 8
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 0, i32 5>
+; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 1, i32 6>
+; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 2, i32 7>
+; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 3, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 4, i32 9>
+; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <10 x i64> [[TMP11]], <10 x i64> poison, <10 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 1, i32 3, i32 5, i32 7, i32 9>
+; CHECK-NEXT: store <10 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 5
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; CHECK-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; CHECK-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; CHECK-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; CHECK-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; CHECK-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+; FIXED-LABEL: @load_store_factor5(
+; FIXED-NEXT: entry:
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; FIXED: vector.ph:
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
+; FIXED: vector.body:
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 5
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <10 x i64>, ptr [[TMP1]], align 8
+; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 0, i32 5>
+; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 1, i32 6>
+; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 2, i32 7>
+; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 3, i32 8>
+; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 4, i32 9>
+; FIXED-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; FIXED-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <10 x i64> [[TMP11]], <10 x i64> poison, <10 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 1, i32 3, i32 5, i32 7, i32 9>
+; FIXED-NEXT: store <10 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; FIXED: middle.block:
+; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; FIXED: scalar.ph:
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED: loop:
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 5
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; FIXED-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; FIXED-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; FIXED-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; FIXED-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; FIXED-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; FIXED: exit:
+; FIXED-NEXT: ret void
+;
+; SCALABLE-LABEL: @load_store_factor5(
+; SCALABLE-NEXT: entry:
+; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; SCALABLE: vector.ph:
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
+; SCALABLE: vector.body:
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 5
+; SCALABLE-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <10 x i64>, ptr [[TMP1]], align 8
+; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 0, i32 5>
+; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 1, i32 6>
+; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 2, i32 7>
+; SCALABLE-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 3, i32 8>
+; SCALABLE-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <10 x i64> [[WIDE_VEC]], <10 x i64> poison, <2 x i32> <i32 4, i32 9>
+; SCALABLE-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; SCALABLE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; SCALABLE-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; SCALABLE-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SCALABLE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <10 x i64> [[TMP11]], <10 x i64> poison, <10 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 1, i32 3, i32 5, i32 7, i32 9>
+; SCALABLE-NEXT: store <10 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; SCALABLE: middle.block:
+; SCALABLE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; SCALABLE: scalar.ph:
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE: loop:
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 5
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; SCALABLE-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; SCALABLE-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; SCALABLE-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; SCALABLE-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; SCALABLE: exit:
+; SCALABLE-NEXT: ret void
+;
+entry:
+ br label %loop
+loop:
+ %i = phi i64 [0, %entry], [%nexti, %loop]
+
+ %offset0 = mul i64 %i, 5
+ %q0 = getelementptr i64, ptr %p, i64 %offset0
+ %x0 = load i64, ptr %q0
+ %y0 = add i64 %x0, 1
+ store i64 %y0, ptr %q0
+
+ %offset1 = add i64 %offset0, 1
+ %q1 = getelementptr i64, ptr %p, i64 %offset1
+ %x1 = load i64, ptr %q1
+ %y1 = add i64 %x1, 2
+ store i64 %y1, ptr %q1
+
+ %offset2 = add i64 %offset1, 1
+ %q2 = getelementptr i64, ptr %p, i64 %offset2
+ %x2 = load i64, ptr %q2
+ %y2 = add i64 %x2, 3
+ store i64 %y2, ptr %q2
+
+ %offset3 = add i64 %offset2, 1
+ %q3 = getelementptr i64, ptr %p, i64 %offset3
+ %x3 = load i64, ptr %q3
+ %y3 = add i64 %x3, 4
+ store i64 %y3, ptr %q3
+
+ %offset4 = add i64 %offset3, 1
+ %q4 = getelementptr i64, ptr %p, i64 %offset4
+ %x4 = load i64, ptr %q4
+ %y4 = add i64 %x4, 5
+ store i64 %y4, ptr %q4
+
+ %nexti = add i64 %i, 1
+ %done = icmp eq i64 %nexti, 1024
+ br i1 %done, label %exit, label %loop
+exit:
+ ret void
+}
+
+define void @load_store_factor6(ptr %p) {
+; CHECK-LABEL: @load_store_factor6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 6
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP1]], align 8
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 0, i32 6>
+; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 1, i32 7>
+; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 2, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 3, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 4, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 5, i32 11>
+; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
+; CHECK-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 6
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; CHECK-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; CHECK-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; CHECK-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; CHECK-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; CHECK-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; CHECK-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
+; CHECK-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
+; CHECK-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8
+; CHECK-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6
+; CHECK-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+; FIXED-LABEL: @load_store_factor6(
+; FIXED-NEXT: entry:
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; FIXED: vector.ph:
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
+; FIXED: vector.body:
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 6
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP1]], align 8
+; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 0, i32 6>
+; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 1, i32 7>
+; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 2, i32 8>
+; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 3, i32 9>
+; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 4, i32 10>
+; FIXED-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 5, i32 11>
+; FIXED-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; FIXED-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; FIXED-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
+; FIXED-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; FIXED: middle.block:
+; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; FIXED: scalar.ph:
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED: loop:
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 6
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; FIXED-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; FIXED-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; FIXED-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; FIXED-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; FIXED-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; FIXED-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
+; FIXED-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
+; FIXED-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8
+; FIXED-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6
+; FIXED-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; FIXED: exit:
+; FIXED-NEXT: ret void
+;
+; SCALABLE-LABEL: @load_store_factor6(
+; SCALABLE-NEXT: entry:
+; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; SCALABLE: vector.ph:
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
+; SCALABLE: vector.body:
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 6
+; SCALABLE-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP1]], align 8
+; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 0, i32 6>
+; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 1, i32 7>
+; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 2, i32 8>
+; SCALABLE-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 3, i32 9>
+; SCALABLE-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 4, i32 10>
+; SCALABLE-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <2 x i32> <i32 5, i32 11>
+; SCALABLE-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; SCALABLE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; SCALABLE-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; SCALABLE-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; SCALABLE-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
+; SCALABLE-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; SCALABLE: middle.block:
+; SCALABLE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; SCALABLE: scalar.ph:
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE: loop:
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 6
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; SCALABLE-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; SCALABLE-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; SCALABLE-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; SCALABLE-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; SCALABLE-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
+; SCALABLE-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
+; SCALABLE-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8
+; SCALABLE-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6
+; SCALABLE-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; SCALABLE: exit:
+; SCALABLE-NEXT: ret void
+;
+entry:
+ br label %loop
+loop:
+ %i = phi i64 [0, %entry], [%nexti, %loop]
+
+ %offset0 = mul i64 %i, 6
+ %q0 = getelementptr i64, ptr %p, i64 %offset0
+ %x0 = load i64, ptr %q0
+ %y0 = add i64 %x0, 1
+ store i64 %y0, ptr %q0
+
+ %offset1 = add i64 %offset0, 1
+ %q1 = getelementptr i64, ptr %p, i64 %offset1
+ %x1 = load i64, ptr %q1
+ %y1 = add i64 %x1, 2
+ store i64 %y1, ptr %q1
+
+ %offset2 = add i64 %offset1, 1
+ %q2 = getelementptr i64, ptr %p, i64 %offset2
+ %x2 = load i64, ptr %q2
+ %y2 = add i64 %x2, 3
+ store i64 %y2, ptr %q2
+
+ %offset3 = add i64 %offset2, 1
+ %q3 = getelementptr i64, ptr %p, i64 %offset3
+ %x3 = load i64, ptr %q3
+ %y3 = add i64 %x3, 4
+ store i64 %y3, ptr %q3
+
+ %offset4 = add i64 %offset3, 1
+ %q4 = getelementptr i64, ptr %p, i64 %offset4
+ %x4 = load i64, ptr %q4
+ %y4 = add i64 %x4, 5
+ store i64 %y4, ptr %q4
+
+ %offset5 = add i64 %offset4, 1
+ %q5 = getelementptr i64, ptr %p, i64 %offset5
+ %x5 = load i64, ptr %q5
+ %y5 = add i64 %x5, 6
+ store i64 %y5, ptr %q5
+
+ %nexti = add i64 %i, 1
+ %done = icmp eq i64 %nexti, 1024
+ br i1 %done, label %exit, label %loop
+exit:
+ ret void
+}
+
+define void @load_store_factor7(ptr %p) {
+; CHECK-LABEL: @load_store_factor7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[INDEX]], 7
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP3]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <14 x i64>, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 0, i32 7>
+; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 1, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 2, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 3, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 4, i32 11>
+; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 5, i32 12>
+; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 6, i32 13>
+; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP19]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP13]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <6 x i64> [[TMP14]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP15]], <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <14 x i64> [[TMP16]], <14 x i64> poison, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
+; CHECK-NEXT: store <14 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 7
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; CHECK-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; CHECK-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; CHECK-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; CHECK-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; CHECK-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; CHECK-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
+; CHECK-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
+; CHECK-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8
+; CHECK-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6
+; CHECK-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8
+; CHECK-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1
+; CHECK-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]]
+; CHECK-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8
+; CHECK-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7
+; CHECK-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+; FIXED-LABEL: @load_store_factor7(
+; FIXED-NEXT: entry:
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; FIXED: vector.ph:
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
+; FIXED: vector.body:
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 7
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <14 x i64>, ptr [[TMP1]], align 8
+; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 0, i32 7>
+; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 1, i32 8>
+; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 2, i32 9>
+; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 3, i32 10>
+; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 4, i32 11>
+; FIXED-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 5, i32 12>
+; FIXED-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 6, i32 13>
+; FIXED-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; FIXED-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; FIXED-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; FIXED-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP13]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; FIXED-NEXT: [[TMP15:%.*]] = shufflevector <6 x i64> [[TMP14]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
+; FIXED-NEXT: [[TMP16:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP15]], <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <14 x i64> [[TMP16]], <14 x i64> poison, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
+; FIXED-NEXT: store <14 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; FIXED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; FIXED: middle.block:
+; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; FIXED: scalar.ph:
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED: loop:
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 7
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; FIXED-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; FIXED-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; FIXED-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; FIXED-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; FIXED-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; FIXED-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
+; FIXED-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
+; FIXED-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8
+; FIXED-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6
+; FIXED-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8
+; FIXED-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1
+; FIXED-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]]
+; FIXED-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8
+; FIXED-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7
+; FIXED-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; FIXED: exit:
+; FIXED-NEXT: ret void
+;
+; SCALABLE-LABEL: @load_store_factor7(
+; SCALABLE-NEXT: entry:
+; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; SCALABLE: vector.ph:
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
+; SCALABLE: vector.body:
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[INDEX]], 7
+; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP3]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <14 x i64>, ptr [[TMP4]], align 8
+; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 0, i32 7>
+; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 1, i32 8>
+; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 2, i32 9>
+; SCALABLE-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 3, i32 10>
+; SCALABLE-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 4, i32 11>
+; SCALABLE-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 5, i32 12>
+; SCALABLE-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 6, i32 13>
+; SCALABLE-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; SCALABLE-NEXT: [[TMP18:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; SCALABLE-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; SCALABLE-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; SCALABLE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; SCALABLE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP19]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; SCALABLE-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP13]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; SCALABLE-NEXT: [[TMP15:%.*]] = shufflevector <6 x i64> [[TMP14]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
+; SCALABLE-NEXT: [[TMP16:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP15]], <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <14 x i64> [[TMP16]], <14 x i64> poison, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
+; SCALABLE-NEXT: store <14 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; SCALABLE: middle.block:
+; SCALABLE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; SCALABLE: scalar.ph:
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE: loop:
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 7
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
+; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
+; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
+; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
+; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
+; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
+; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
+; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
+; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
+; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
+; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4
+; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
+; SCALABLE-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
+; SCALABLE-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8
+; SCALABLE-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5
+; SCALABLE-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8
+; SCALABLE-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
+; SCALABLE-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
+; SCALABLE-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8
+; SCALABLE-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6
+; SCALABLE-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8
+; SCALABLE-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1
+; SCALABLE-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]]
+; SCALABLE-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8
+; SCALABLE-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7
+; SCALABLE-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; SCALABLE: exit:
+; SCALABLE-NEXT: ret void
+;
+entry:
+ br label %loop
+loop:
+ %i = phi i64 [0, %entry], [%nexti, %loop]
+
+ %offset0 = mul i64 %i, 7
+ %q0 = getelementptr i64, ptr %p, i64 %offset0
+ %x0 = load i64, ptr %q0
+ %y0 = add i64 %x0, 1
+ store i64 %y0, ptr %q0
+
+ %offset1 = add i64 %offset0, 1
+ %q1 = getelementptr i64, ptr %p, i64 %offset1
+ %x1 = load i64, ptr %q1
+ %y1 = add i64 %x1, 2
+ store i64 %y1, ptr %q1
+
+ %offset2 = add i64 %offset1, 1
+ %q2 = getelementptr i64, ptr %p, i64 %offset2
+ %x2 = load i64, ptr %q2
+ %y2 = add i64 %x2, 3
+ store i64 %y2, ptr %q2
+
+ %offset3 = add i64 %offset2, 1
+ %q3 = getelementptr i64, ptr %p, i64 %offset3
+ %x3 = load i64, ptr %q3
+ %y3 = add i64 %x3, 4
+ store i64 %y3, ptr %q3
+
+ %offset4 = add i64 %offset3, 1
+ %q4 = getelementptr i64, ptr %p, i64 %offset4
+ %x4 = load i64, ptr %q4
+ %y4 = add i64 %x4, 5
+ store i64 %y4, ptr %q4
+
+ %offset5 = add i64 %offset4, 1
+ %q5 = getelementptr i64, ptr %p, i64 %offset5
+ %x5 = load i64, ptr %q5
+ %y5 = add i64 %x5, 6
+ store i64 %y5, ptr %q5
+
+ %offset6 = add i64 %offset5, 1
+ %q6 = getelementptr i64, ptr %p, i64 %offset6
+ %x6 = load i64, ptr %q6
+ %y6 = add i64 %x6, 7
+ store i64 %y6, ptr %q6
+
+ %nexti = add i64 %i, 1
+ %done = icmp eq i64 %nexti, 1024
+ br i1 %done, label %exit, label %loop
+exit:
+ ret void
+}
+
define void @load_store_factor8(ptr %p) {
; CHECK-LABEL: @load_store_factor8(
; CHECK-NEXT: entry:
@@ -743,18 +1819,18 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[Q0]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP6]])
-; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP7]])
-; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 1
-; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP8]])
-; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP9]])
-; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP10]])
-; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP11]])
+; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP5]])
+; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP6]])
+; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 1
+; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP7]])
+; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP8]])
+; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP9]])
+; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 0
; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 0
@@ -781,7 +1857,7 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: store <vscale x 8 x i64> [[INTERLEAVED_VEC12]], ptr [[Q0]], align 8
; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP2]]
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -832,7 +1908,7 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8
; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -873,7 +1949,7 @@ define void @load_store_factor8(ptr %p) {
; FIXED-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 2
; FIXED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; FIXED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
@@ -923,7 +1999,7 @@ define void @load_store_factor8(ptr %p) {
; FIXED-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8
; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP11:![0-9]+]]
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP19:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
@@ -944,18 +2020,18 @@ define void @load_store_factor8(ptr %p) {
; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[Q0]], align 8
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> [[WIDE_VEC]])
-; SCALABLE-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP6]])
-; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP7]])
-; SCALABLE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 0
-; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 0
-; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 1
-; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 1
-; SCALABLE-NEXT: [[STRIDED_VEC3:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP8]])
-; SCALABLE-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP9]])
-; SCALABLE-NEXT: [[STRIDED_VEC5:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP10]])
-; SCALABLE-NEXT: [[STRIDED_VEC6:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP11]])
+; SCALABLE-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP5]])
+; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[TMP6]])
+; SCALABLE-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 0
+; SCALABLE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 0
+; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC1]], 1
+; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC2]], 1
+; SCALABLE-NEXT: [[STRIDED_VEC3:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP7]])
+; SCALABLE-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP8]])
+; SCALABLE-NEXT: [[STRIDED_VEC5:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP9]])
+; SCALABLE-NEXT: [[STRIDED_VEC6:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP10]])
; SCALABLE-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 0
; SCALABLE-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 0
; SCALABLE-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 0
@@ -982,7 +2058,7 @@ define void @load_store_factor8(ptr %p) {
; SCALABLE-NEXT: store <vscale x 8 x i64> [[INTERLEAVED_VEC12]], ptr [[Q0]], align 8
; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP2]]
; SCALABLE-NEXT: [[TMP28:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -1033,7 +2109,7 @@ define void @load_store_factor8(ptr %p) {
; SCALABLE-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8
; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP11:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP19:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
@@ -1126,7 +2202,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -1146,7 +2222,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: store i32 [[RES]], ptr [[DST]], align 4
; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -1177,7 +2253,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; FIXED-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP10]], align 4
; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 16
; FIXED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; FIXED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
@@ -1196,7 +2272,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; FIXED-NEXT: store i32 [[RES]], ptr [[DST]], align 4
; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP13:![0-9]+]]
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP21:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
@@ -1228,7 +2304,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP13]], align 4
; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -1248,7 +2324,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: store i32 [[RES]], ptr [[DST]], align 4
; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP13:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP21:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
@@ -1306,7 +2382,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -1326,7 +2402,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: store i64 [[RES]], ptr [[DST]], align 8
; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -1357,7 +2433,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP10]], align 8
; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 8
; FIXED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; FIXED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
@@ -1376,7 +2452,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; FIXED-NEXT: store i64 [[RES]], ptr [[DST]], align 8
; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP15:![0-9]+]]
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP23:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
@@ -1408,7 +2484,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: store <vscale x 2 x i64> [[TMP11]], ptr [[TMP13]], align 8
; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -1428,7 +2504,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: store i64 [[RES]], ptr [[DST]], align 8
; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP15:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP23:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
More information about the llvm-commits
mailing list