[llvm] 5458ea5 - [LV] Regenerate UTC variable names in RISCV/interleaved-accesses.ll. NFC
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 17:07:41 PDT 2025
Author: Luke Lau
Date: 2025-06-04T01:07:32+01:00
New Revision: 5458ea5122ed972c0a36e16a0364c8bfa0fb592a
URL: https://github.com/llvm/llvm-project/commit/5458ea5122ed972c0a36e16a0364c8bfa0fb592a
DIFF: https://github.com/llvm/llvm-project/commit/5458ea5122ed972c0a36e16a0364c8bfa0fb592a.diff
LOG: [LV] Regenerate UTC variable names in RISCV/interleaved-accesses.ll. NFC
Added:
Modified:
llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
index f48691bd54417..feb3b6d42b658 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
@@ -9,7 +9,7 @@ define void @load_store_factor2_i32(ptr %p) {
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
@@ -17,86 +17,86 @@ define void @load_store_factor2_i32(ptr %p) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[Q0]], align 4
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], splat (i32 1)
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 2)
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32> [[TMP12]])
-; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[TMP8]], splat (i32 1)
+; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], splat (i32 2)
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP11]])
+; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; CHECK-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q2]], align 4
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
-; CHECK-NEXT: store i32 [[Y0]], ptr [[Q2]], align 4
-; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; CHECK-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; CHECK-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; CHECK-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2
; CHECK-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4
-; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @load_store_factor2_i32(
; FIXED-NEXT: entry:
-; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
-; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
-; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[Q0]], align 4
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = shl i64 [[INDEX]], 1
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP1]], align 4
; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
-; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
-; FIXED-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; FIXED-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 8
-; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; FIXED-NEXT: [[TMP2:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; FIXED-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; FIXED-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 4
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; FIXED-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
-; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT: br label [[LOOP1:%.*]]
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
; FIXED: loop:
-; FIXED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; FIXED-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q2]], align 4
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
; FIXED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
-; FIXED-NEXT: store i32 [[Y0]], ptr [[Q2]], align 4
-; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; FIXED-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2
; FIXED-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4
-; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP3:![0-9]+]]
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
@@ -105,7 +105,7 @@ define void @load_store_factor2_i32(ptr %p) {
; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
@@ -113,43 +113,43 @@ define void @load_store_factor2_i32(ptr %p) {
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[Q0]], align 4
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP6]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP7]], align 4
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], splat (i32 1)
-; SCALABLE-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 2)
-; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32> [[TMP12]])
-; SCALABLE-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; SCALABLE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[TMP8]], splat (i32 1)
+; SCALABLE-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], splat (i32 2)
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP11]])
+; SCALABLE-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
-; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT: br label [[LOOP1:%.*]]
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: loop:
-; SCALABLE-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q2]], align 4
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
; SCALABLE-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
-; SCALABLE-NEXT: store i32 [[Y0]], ptr [[Q2]], align 4
-; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; SCALABLE-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2
; SCALABLE-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4
-; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP3:![0-9]+]]
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
@@ -183,7 +183,7 @@ define void @load_store_factor2_i64(ptr %p) {
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -191,86 +191,86 @@ define void @load_store_factor2_i64(ptr %p) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[Q0]], align 8
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], splat (i64 1)
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 2)
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP12]])
-; CHECK-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[TMP8]], splat (i64 1)
+; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], splat (i64 2)
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP10]], <vscale x 2 x i64> [[TMP11]])
+; CHECK-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q2]], align 8
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; CHECK-NEXT: store i64 [[Y0]], ptr [[Q2]], align 8
-; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
-; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @load_store_factor2_i64(
; FIXED-NEXT: entry:
-; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
-; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
-; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[Q0]], align 8
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = shl i64 [[INDEX]], 1
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP1]], align 8
; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; FIXED-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
-; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; FIXED-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; FIXED-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 4
-; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; FIXED-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; FIXED-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; FIXED-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
-; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT: br label [[LOOP1:%.*]]
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
; FIXED: loop:
-; FIXED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q2]], align 8
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; FIXED-NEXT: store i64 [[Y0]], ptr [[Q2]], align 8
-; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
-; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
@@ -279,7 +279,7 @@ define void @load_store_factor2_i64(ptr %p) {
; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -287,43 +287,43 @@ define void @load_store_factor2_i64(ptr %p) {
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP7]], align 8
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], splat (i64 1)
-; SCALABLE-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 2)
-; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP12]])
-; SCALABLE-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; SCALABLE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[TMP8]], splat (i64 1)
+; SCALABLE-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], splat (i64 2)
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP10]], <vscale x 2 x i64> [[TMP11]])
+; SCALABLE-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
-; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT: br label [[LOOP1:%.*]]
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: loop:
-; SCALABLE-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q2]], align 8
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q2]], align 8
-; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8
-; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
@@ -354,14 +354,14 @@ exit:
define void @load_store_factor3_i32(ptr %p) {
; CHECK-LABEL: @load_store_factor3_i32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
-; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[Q0]], align 4
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
@@ -372,23 +372,23 @@ define void @load_store_factor3_i32(ptr %p) {
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP7]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], 8
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT: [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; CHECK-NEXT: [[Q3:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET3]]
-; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q3]], align 4
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
-; CHECK-NEXT: store i32 [[Y0]], ptr [[Q3]], align 4
-; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; CHECK-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; CHECK-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; CHECK-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2
@@ -398,49 +398,49 @@ define void @load_store_factor3_i32(ptr %p) {
; CHECK-NEXT: [[X2:%.*]] = load i32, ptr [[Q2]], align 4
; CHECK-NEXT: [[Y2:%.*]] = add i32 [[X2]], 3
; CHECK-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4
-; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @load_store_factor3_i32(
; FIXED-NEXT: entry:
-; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
-; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
-; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
-; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[Q0]], align 4
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP1]], align 4
; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; FIXED-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
-; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
-; FIXED-NEXT: [[TMP5:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
-; FIXED-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; FIXED-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; FIXED-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP8]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; FIXED-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 8
-; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; FIXED-NEXT: [[TMP2:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
+; FIXED-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; FIXED-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; FIXED-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP7]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; FIXED-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 4
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
-; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT: br label [[LOOP1:%.*]]
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
; FIXED: loop:
-; FIXED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT: [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; FIXED-NEXT: [[Q3:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET3]]
-; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q3]], align 4
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
; FIXED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
-; FIXED-NEXT: store i32 [[Y0]], ptr [[Q3]], align 4
-; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; FIXED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; FIXED-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2
@@ -450,22 +450,22 @@ define void @load_store_factor3_i32(ptr %p) {
; FIXED-NEXT: [[X2:%.*]] = load i32, ptr [[Q2]], align 4
; FIXED-NEXT: [[Y2:%.*]] = add i32 [[X2]], 3
; FIXED-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4
-; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP7:![0-9]+]]
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
; SCALABLE-LABEL: @load_store_factor3_i32(
; SCALABLE-NEXT: entry:
-; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
-; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
-; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[Q0]], align 4
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
+; SCALABLE-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP0]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP1]], align 4
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
@@ -476,23 +476,23 @@ define void @load_store_factor3_i32(ptr %p) {
; SCALABLE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SCALABLE-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP7]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; SCALABLE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[Q0]], align 4
-; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], 8
-; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; SCALABLE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 4
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
-; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT: br label [[LOOP1:%.*]]
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: loop:
-; SCALABLE-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT: [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET3]]
-; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q3]], align 4
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
; SCALABLE-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
-; SCALABLE-NEXT: store i32 [[Y0]], ptr [[Q3]], align 4
-; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; SCALABLE-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; SCALABLE-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2
@@ -502,9 +502,9 @@ define void @load_store_factor3_i32(ptr %p) {
; SCALABLE-NEXT: [[X2:%.*]] = load i32, ptr [[Q2]], align 4
; SCALABLE-NEXT: [[Y2:%.*]] = add i32 [[X2]], 3
; SCALABLE-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4
-; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP7:![0-9]+]]
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
@@ -541,14 +541,14 @@ exit:
define void @load_store_factor3_i64(ptr %p) {
; CHECK-LABEL: @load_store_factor3_i64(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
-; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[Q0]], align 8
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
@@ -559,23 +559,23 @@ define void @load_store_factor3_i64(ptr %p) {
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> [[TMP6]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP7]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT: [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
-; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q3]], align 8
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; CHECK-NEXT: store i64 [[Y0]], ptr [[Q3]], align 8
-; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
@@ -585,49 +585,49 @@ define void @load_store_factor3_i64(ptr %p) {
; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
-; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @load_store_factor3_i64(
; FIXED-NEXT: entry:
-; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
-; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
-; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
-; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[Q0]], align 8
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP1]], align 8
; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; FIXED-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
-; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; FIXED-NEXT: [[TMP5:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
-; FIXED-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; FIXED-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; FIXED-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP8]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; FIXED-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 4
-; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; FIXED-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; FIXED-NEXT: [[TMP7:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> [[TMP6]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP7]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; FIXED-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
-; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT: br label [[LOOP1:%.*]]
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
; FIXED: loop:
-; FIXED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT: [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
-; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q3]], align 8
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; FIXED-NEXT: store i64 [[Y0]], ptr [[Q3]], align 8
-; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
@@ -637,22 +637,22 @@ define void @load_store_factor3_i64(ptr %p) {
; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
-; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP9:![0-9]+]]
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
; SCALABLE-LABEL: @load_store_factor3_i64(
; SCALABLE-NEXT: entry:
-; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
-; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
-; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
+; SCALABLE-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP1]], align 8
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
@@ -663,23 +663,23 @@ define void @load_store_factor3_i64(ptr %p) {
; SCALABLE-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SCALABLE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> [[TMP6]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP7]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; SCALABLE-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], 4
-; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; SCALABLE-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
-; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT: br label [[LOOP1:%.*]]
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: loop:
-; SCALABLE-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT: [[OFFSET3:%.*]] = mul i64 [[I1]], 3
-; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
-; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q3]], align 8
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q3]], align 8
-; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET3]], 1
+; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
@@ -689,9 +689,9 @@ define void @load_store_factor3_i64(ptr %p) {
; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8
; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3
; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8
-; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP9:![0-9]+]]
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
@@ -1499,9 +1499,9 @@ define void @load_store_factor7(ptr %p) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[INDEX]], 7
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP3]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <14 x i64>, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 7
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <14 x i64>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 0, i32 7>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 1, i32 8>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 2, i32 9>
@@ -1510,14 +1510,14 @@ define void @load_store_factor7(ptr %p) {
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 5, i32 12>
; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 6, i32 13>
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
-; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP19]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
@@ -1525,7 +1525,7 @@ define void @load_store_factor7(ptr %p) {
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <6 x i64> [[TMP14]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP15]], <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <14 x i64> [[TMP16]], <14 x i64> poison, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
-; CHECK-NEXT: store <14 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8
+; CHECK-NEXT: store <14 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -1669,9 +1669,9 @@ define void @load_store_factor7(ptr %p) {
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[INDEX]], 7
-; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP3]]
-; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <14 x i64>, ptr [[TMP4]], align 8
+; SCALABLE-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 7
+; SCALABLE-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <14 x i64>, ptr [[TMP1]], align 8
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 0, i32 7>
; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 1, i32 8>
; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 2, i32 9>
@@ -1680,14 +1680,14 @@ define void @load_store_factor7(ptr %p) {
; SCALABLE-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 5, i32 12>
; SCALABLE-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <14 x i64> [[WIDE_VEC]], <14 x i64> poison, <2 x i32> <i32 6, i32 13>
; SCALABLE-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
-; SCALABLE-NEXT: [[TMP18:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; SCALABLE-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; SCALABLE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
; SCALABLE-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
; SCALABLE-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
; SCALABLE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
-; SCALABLE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SCALABLE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP19]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SCALABLE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
@@ -1695,7 +1695,7 @@ define void @load_store_factor7(ptr %p) {
; SCALABLE-NEXT: [[TMP15:%.*]] = shufflevector <6 x i64> [[TMP14]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
; SCALABLE-NEXT: [[TMP16:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP15]], <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <14 x i64> [[TMP16]], <14 x i64> poison, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
-; SCALABLE-NEXT: store <14 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8
+; SCALABLE-NEXT: store <14 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -1806,18 +1806,18 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3
-; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[Q0]], align 8
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP3]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> [[WIDE_VEC]])
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
@@ -1831,47 +1831,47 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP8]])
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP9]])
; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP10]])
-; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 1
-; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 1
-; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 1
-; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 1
-; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 1 x i64> [[TMP12]], splat (i64 1)
-; CHECK-NEXT: [[TMP21:%.*]] = add <vscale x 1 x i64> [[TMP13]], splat (i64 2)
-; CHECK-NEXT: [[TMP22:%.*]] = add <vscale x 1 x i64> [[TMP14]], splat (i64 3)
-; CHECK-NEXT: [[TMP23:%.*]] = add <vscale x 1 x i64> [[TMP15]], splat (i64 4)
-; CHECK-NEXT: [[TMP24:%.*]] = add <vscale x 1 x i64> [[TMP16]], splat (i64 5)
-; CHECK-NEXT: [[TMP25:%.*]] = add <vscale x 1 x i64> [[TMP17]], splat (i64 6)
-; CHECK-NEXT: [[TMP26:%.*]] = add <vscale x 1 x i64> [[TMP18]], splat (i64 7)
-; CHECK-NEXT: [[TMP27:%.*]] = add <vscale x 1 x i64> [[TMP19]], splat (i64 8)
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP20]], <vscale x 1 x i64> [[TMP24]])
-; CHECK-NEXT: [[INTERLEAVED_VEC7:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP21]], <vscale x 1 x i64> [[TMP25]])
-; CHECK-NEXT: [[INTERLEAVED_VEC8:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP22]], <vscale x 1 x i64> [[TMP26]])
-; CHECK-NEXT: [[INTERLEAVED_VEC9:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP23]], <vscale x 1 x i64> [[TMP27]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 1
+; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 1
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 1 x i64> [[TMP11]], splat (i64 1)
+; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 1 x i64> [[TMP12]], splat (i64 2)
+; CHECK-NEXT: [[TMP21:%.*]] = add <vscale x 1 x i64> [[TMP13]], splat (i64 3)
+; CHECK-NEXT: [[TMP22:%.*]] = add <vscale x 1 x i64> [[TMP14]], splat (i64 4)
+; CHECK-NEXT: [[TMP23:%.*]] = add <vscale x 1 x i64> [[TMP15]], splat (i64 5)
+; CHECK-NEXT: [[TMP24:%.*]] = add <vscale x 1 x i64> [[TMP16]], splat (i64 6)
+; CHECK-NEXT: [[TMP25:%.*]] = add <vscale x 1 x i64> [[TMP17]], splat (i64 7)
+; CHECK-NEXT: [[TMP26:%.*]] = add <vscale x 1 x i64> [[TMP18]], splat (i64 8)
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP19]], <vscale x 1 x i64> [[TMP23]])
+; CHECK-NEXT: [[INTERLEAVED_VEC7:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP20]], <vscale x 1 x i64> [[TMP24]])
+; CHECK-NEXT: [[INTERLEAVED_VEC8:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP21]], <vscale x 1 x i64> [[TMP25]])
+; CHECK-NEXT: [[INTERLEAVED_VEC9:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP22]], <vscale x 1 x i64> [[TMP26]])
; CHECK-NEXT: [[INTERLEAVED_VEC10:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[INTERLEAVED_VEC]], <vscale x 2 x i64> [[INTERLEAVED_VEC8]])
; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[INTERLEAVED_VEC7]], <vscale x 2 x i64> [[INTERLEAVED_VEC9]])
; CHECK-NEXT: [[INTERLEAVED_VEC12:%.*]] = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> [[INTERLEAVED_VEC10]], <vscale x 4 x i64> [[INTERLEAVED_VEC11]])
-; CHECK-NEXT: store <vscale x 8 x i64> [[INTERLEAVED_VEC12]], ptr [[Q0]], align 8
-; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP2]]
-; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: store <vscale x 8 x i64> [[INTERLEAVED_VEC12]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
+; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT: [[OFFSET8:%.*]] = shl i64 [[I1]], 3
-; CHECK-NEXT: [[Q8:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET8]]
-; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q8]], align 8
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; CHECK-NEXT: store i64 [[Y0]], ptr [[Q8]], align 8
-; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET8]], 1
+; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
@@ -1906,22 +1906,22 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: [[X7:%.*]] = load i64, ptr [[Q7]], align 8
; CHECK-NEXT: [[Y7:%.*]] = add i64 [[X7]], 8
; CHECK-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8
-; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @load_store_factor8(
; FIXED-NEXT: entry:
-; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
-; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
-; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3
-; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[Q0]], align 8
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = shl i64 [[INDEX]], 3
+; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP1]], align 8
; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 0, i32 8>
; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 1, i32 9>
; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 2, i32 10>
@@ -1930,39 +1930,39 @@ define void @load_store_factor8(ptr %p) {
; FIXED-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
; FIXED-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
; FIXED-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
-; FIXED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
-; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
-; FIXED-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
-; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
-; FIXED-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
-; FIXED-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
-; FIXED-NEXT: [[TMP9:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
-; FIXED-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
-; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; FIXED-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; FIXED-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; FIXED-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; FIXED-NEXT: [[TMP17:%.*]] = shufflevector <8 x i64> [[TMP15]], <8 x i64> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP17]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[Q0]], align 8
-; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 2
-; FIXED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]]
+; FIXED-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT: [[TMP5:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; FIXED-NEXT: [[TMP7:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; FIXED-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; FIXED-NEXT: [[TMP9:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
+; FIXED-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FIXED-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; FIXED-NEXT: [[TMP16:%.*]] = shufflevector <8 x i64> [[TMP14]], <8 x i64> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP16]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; FIXED-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; FIXED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
-; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT: br label [[LOOP1:%.*]]
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
; FIXED: loop:
-; FIXED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT: [[OFFSET8:%.*]] = shl i64 [[I1]], 3
-; FIXED-NEXT: [[Q8:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET8]]
-; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q8]], align 8
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; FIXED-NEXT: store i64 [[Y0]], ptr [[Q8]], align 8
-; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET8]], 1
+; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
@@ -1997,9 +1997,9 @@ define void @load_store_factor8(ptr %p) {
; FIXED-NEXT: [[X7:%.*]] = load i64, ptr [[Q7]], align 8
; FIXED-NEXT: [[Y7:%.*]] = add i64 [[X7]], 8
; FIXED-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8
-; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP19:![0-9]+]]
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
@@ -2007,18 +2007,18 @@ define void @load_store_factor8(ptr %p) {
; SCALABLE-NEXT: entry:
; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
-; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3
-; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP3:%.*]] = shl i64 [[INDEX]], 3
+; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP3]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i64>, ptr [[TMP4]], align 8
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> [[WIDE_VEC]])
; SCALABLE-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 0
; SCALABLE-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } [[STRIDED_VEC]], 1
@@ -2032,47 +2032,47 @@ define void @load_store_factor8(ptr %p) {
; SCALABLE-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP8]])
; SCALABLE-NEXT: [[STRIDED_VEC5:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP9]])
; SCALABLE-NEXT: [[STRIDED_VEC6:%.*]] = call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> [[TMP10]])
-; SCALABLE-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 0
-; SCALABLE-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 0
-; SCALABLE-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 0
-; SCALABLE-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 0
-; SCALABLE-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 1
-; SCALABLE-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 1
-; SCALABLE-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 1
-; SCALABLE-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 1
-; SCALABLE-NEXT: [[TMP20:%.*]] = add <vscale x 1 x i64> [[TMP12]], splat (i64 1)
-; SCALABLE-NEXT: [[TMP21:%.*]] = add <vscale x 1 x i64> [[TMP13]], splat (i64 2)
-; SCALABLE-NEXT: [[TMP22:%.*]] = add <vscale x 1 x i64> [[TMP14]], splat (i64 3)
-; SCALABLE-NEXT: [[TMP23:%.*]] = add <vscale x 1 x i64> [[TMP15]], splat (i64 4)
-; SCALABLE-NEXT: [[TMP24:%.*]] = add <vscale x 1 x i64> [[TMP16]], splat (i64 5)
-; SCALABLE-NEXT: [[TMP25:%.*]] = add <vscale x 1 x i64> [[TMP17]], splat (i64 6)
-; SCALABLE-NEXT: [[TMP26:%.*]] = add <vscale x 1 x i64> [[TMP18]], splat (i64 7)
-; SCALABLE-NEXT: [[TMP27:%.*]] = add <vscale x 1 x i64> [[TMP19]], splat (i64 8)
-; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP20]], <vscale x 1 x i64> [[TMP24]])
-; SCALABLE-NEXT: [[INTERLEAVED_VEC7:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP21]], <vscale x 1 x i64> [[TMP25]])
-; SCALABLE-NEXT: [[INTERLEAVED_VEC8:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP22]], <vscale x 1 x i64> [[TMP26]])
-; SCALABLE-NEXT: [[INTERLEAVED_VEC9:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP23]], <vscale x 1 x i64> [[TMP27]])
+; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 0
+; SCALABLE-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 0
+; SCALABLE-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 0
+; SCALABLE-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 0
+; SCALABLE-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC3]], 1
+; SCALABLE-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC4]], 1
+; SCALABLE-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC5]], 1
+; SCALABLE-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } [[STRIDED_VEC6]], 1
+; SCALABLE-NEXT: [[TMP19:%.*]] = add <vscale x 1 x i64> [[TMP11]], splat (i64 1)
+; SCALABLE-NEXT: [[TMP20:%.*]] = add <vscale x 1 x i64> [[TMP12]], splat (i64 2)
+; SCALABLE-NEXT: [[TMP21:%.*]] = add <vscale x 1 x i64> [[TMP13]], splat (i64 3)
+; SCALABLE-NEXT: [[TMP22:%.*]] = add <vscale x 1 x i64> [[TMP14]], splat (i64 4)
+; SCALABLE-NEXT: [[TMP23:%.*]] = add <vscale x 1 x i64> [[TMP15]], splat (i64 5)
+; SCALABLE-NEXT: [[TMP24:%.*]] = add <vscale x 1 x i64> [[TMP16]], splat (i64 6)
+; SCALABLE-NEXT: [[TMP25:%.*]] = add <vscale x 1 x i64> [[TMP17]], splat (i64 7)
+; SCALABLE-NEXT: [[TMP26:%.*]] = add <vscale x 1 x i64> [[TMP18]], splat (i64 8)
+; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP19]], <vscale x 1 x i64> [[TMP23]])
+; SCALABLE-NEXT: [[INTERLEAVED_VEC7:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP20]], <vscale x 1 x i64> [[TMP24]])
+; SCALABLE-NEXT: [[INTERLEAVED_VEC8:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP21]], <vscale x 1 x i64> [[TMP25]])
+; SCALABLE-NEXT: [[INTERLEAVED_VEC9:%.*]] = call <vscale x 2 x i64> @llvm.vector.interleave2.nxv2i64(<vscale x 1 x i64> [[TMP22]], <vscale x 1 x i64> [[TMP26]])
; SCALABLE-NEXT: [[INTERLEAVED_VEC10:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[INTERLEAVED_VEC]], <vscale x 2 x i64> [[INTERLEAVED_VEC8]])
; SCALABLE-NEXT: [[INTERLEAVED_VEC11:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[INTERLEAVED_VEC7]], <vscale x 2 x i64> [[INTERLEAVED_VEC9]])
; SCALABLE-NEXT: [[INTERLEAVED_VEC12:%.*]] = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> [[INTERLEAVED_VEC10]], <vscale x 4 x i64> [[INTERLEAVED_VEC11]])
-; SCALABLE-NEXT: store <vscale x 8 x i64> [[INTERLEAVED_VEC12]], ptr [[Q0]], align 8
-; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP2]]
-; SCALABLE-NEXT: [[TMP28:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]]
+; SCALABLE-NEXT: store <vscale x 8 x i64> [[INTERLEAVED_VEC12]], ptr [[TMP4]], align 8
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
+; SCALABLE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
-; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT: br label [[LOOP1:%.*]]
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: loop:
-; SCALABLE-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT: [[OFFSET8:%.*]] = shl i64 [[I1]], 3
-; SCALABLE-NEXT: [[Q8:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET8]]
-; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q8]], align 8
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1
-; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q8]], align 8
-; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET8]], 1
+; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2
@@ -2107,9 +2107,9 @@ define void @load_store_factor8(ptr %p) {
; SCALABLE-NEXT: [[X7:%.*]] = load i64, ptr [[Q7]], align 8
; SCALABLE-NEXT: [[Y7:%.*]] = add i64 [[X7]], 8
; SCALABLE-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8
-; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP19:![0-9]+]]
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
@@ -2179,7 +2179,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
@@ -2187,92 +2187,92 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[Q0]], align 4
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[I]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP13]], align 4
-; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP12]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; CHECK-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q2]], align 4
-; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; CHECK-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; CHECK-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; CHECK-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]]
-; CHECK-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I1]]
+; CHECK-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
; CHECK-NEXT: store i32 [[RES]], ptr [[DST]], align 4
-; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @combine_load_factor2_i32(
; FIXED-NEXT: entry:
-; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
-; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
-; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[I]], 8
-; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; FIXED-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
-; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]]
-; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[Q0]], align 4
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 8
+; FIXED-NEXT: [[TMP1:%.*]] = shl i64 [[INDEX]], 1
+; FIXED-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1
+; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
+; FIXED-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP2]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4
; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT: [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
+; FIXED-NEXT: [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4
; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]]
-; FIXED-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC4]]
-; FIXED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[I]]
-; FIXED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
-; FIXED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 8
+; FIXED-NEXT: [[TMP5:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]]
+; FIXED-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC4]]
+; FIXED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[INDEX]]
+; FIXED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0
+; FIXED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP7]], i32 8
+; FIXED-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP8]], align 4
; FIXED-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP9]], align 4
-; FIXED-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP10]], align 4
-; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 16
-; FIXED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
-; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT: br label [[LOOP1:%.*]]
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
; FIXED: loop:
-; FIXED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; FIXED-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q2]], align 4
-; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; FIXED-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]]
-; FIXED-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I1]]
+; FIXED-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
; FIXED-NEXT: store i32 [[RES]], ptr [[DST]], align 4
-; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP21:![0-9]+]]
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
@@ -2281,7 +2281,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
@@ -2289,42 +2289,42 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
-; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[Q0]], align 4
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP6]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP7]], align 4
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[TMP9]], [[TMP10]]
-; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[I]]
-; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
-; SCALABLE-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP13]], align 4
-; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
+; SCALABLE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[TMP9]]
+; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[INDEX]]
+; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0
+; SCALABLE-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP12]], align 4
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
-; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT: br label [[LOOP1:%.*]]
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: loop:
-; SCALABLE-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
-; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q2]], align 4
-; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4
; SCALABLE-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]]
-; SCALABLE-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I1]]
+; SCALABLE-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
; SCALABLE-NEXT: store i32 [[RES]], ptr [[DST]], align 4
-; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP21:![0-9]+]]
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
@@ -2359,7 +2359,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -2367,92 +2367,92 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[Q0]], align 8
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[I]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0
-; CHECK-NEXT: store <vscale x 2 x i64> [[TMP11]], ptr [[TMP13]], align 8
-; CHECK-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
+; CHECK-NEXT: store <vscale x 2 x i64> [[TMP10]], ptr [[TMP12]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; CHECK-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q2]], align 8
-; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; CHECK-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]]
-; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I1]]
+; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
; CHECK-NEXT: store i64 [[RES]], ptr [[DST]], align 8
-; CHECK-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @combine_load_factor2_i64(
; FIXED-NEXT: entry:
-; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
-; FIXED-NEXT: br label [[LOOP:%.*]]
+; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
-; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[I]], 4
-; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; FIXED-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
-; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; FIXED-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]]
-; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[Q0]], align 8
+; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 4
+; FIXED-NEXT: [[TMP1:%.*]] = shl i64 [[INDEX]], 1
+; FIXED-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1
+; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
+; FIXED-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP2]]
+; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 8
; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; FIXED-NEXT: [[WIDE_VEC2:%.*]] = load <8 x i64>, ptr [[TMP5]], align 8
+; FIXED-NEXT: [[WIDE_VEC2:%.*]] = load <8 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i64> [[WIDE_VEC2]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i64> [[WIDE_VEC2]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; FIXED-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC1]]
-; FIXED-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC3]], [[STRIDED_VEC4]]
-; FIXED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[I]]
-; FIXED-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0
-; FIXED-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP8]], i32 4
+; FIXED-NEXT: [[TMP5:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC1]]
+; FIXED-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC3]], [[STRIDED_VEC4]]
+; FIXED-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[INDEX]]
+; FIXED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
+; FIXED-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP7]], i32 4
+; FIXED-NEXT: store <4 x i64> [[TMP5]], ptr [[TMP8]], align 8
; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP9]], align 8
-; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP10]], align 8
-; FIXED-NEXT: [[NEXTI]] = add nuw i64 [[I]], 8
-; FIXED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; FIXED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
+; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
-; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; FIXED-NEXT: br label [[LOOP1:%.*]]
+; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; FIXED-NEXT: br label [[LOOP:%.*]]
; FIXED: loop:
-; FIXED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; FIXED-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q2]], align 8
-; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; FIXED-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]]
-; FIXED-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I1]]
+; FIXED-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
; FIXED-NEXT: store i64 [[RES]], ptr [[DST]], align 8
-; FIXED-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP23:![0-9]+]]
+; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
; FIXED: exit:
; FIXED-NEXT: ret void
;
@@ -2461,7 +2461,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -2469,42 +2469,42 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; SCALABLE-NEXT: br label [[LOOP:%.*]]
+; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
-; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]]
-; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
+; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP7]], align 8
; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; SCALABLE-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP9]], [[TMP10]]
-; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[I]]
-; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0
-; SCALABLE-NEXT: store <vscale x 2 x i64> [[TMP11]], ptr [[TMP13]], align 8
-; SCALABLE-NEXT: [[NEXTI]] = add nuw i64 [[I]], [[TMP5]]
-; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[NEXTI]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
+; SCALABLE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
+; SCALABLE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
+; SCALABLE-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[TMP8]], [[TMP9]]
+; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[INDEX]]
+; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
+; SCALABLE-NEXT: store <vscale x 2 x i64> [[TMP10]], ptr [[TMP12]], align 8
+; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; SCALABLE: scalar.ph:
-; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
-; SCALABLE-NEXT: br label [[LOOP1:%.*]]
+; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: loop:
-; SCALABLE-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI1:%.*]], [[LOOP1]] ]
-; SCALABLE-NEXT: [[OFFSET2:%.*]] = shl i64 [[I1]], 1
-; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
-; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q2]], align 8
-; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET2]], 1
+; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1
+; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
+; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8
+; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8
; SCALABLE-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]]
-; SCALABLE-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I1]]
+; SCALABLE-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
; SCALABLE-NEXT: store i64 [[RES]], ptr [[DST]], align 8
-; SCALABLE-NEXT: [[NEXTI1]] = add i64 [[I1]], 1
-; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI1]], 1024
-; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP23:![0-9]+]]
+; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
; SCALABLE: exit:
; SCALABLE-NEXT: ret void
;
More information about the llvm-commits
mailing list