[llvm] [LV] Optimize VPWidenIntOrFpInductionRecipe for known TC (PR #118828)

Hari Limaye via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 17 06:25:38 PST 2025


https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/118828

>From 276e940f2ccb01766ea9167576827bc374767baf Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 5 Dec 2024 16:14:46 +0000
Subject: [PATCH 1/2] [LV] Pre-commit tests for optimizing induction variable
 width

---
 ...folding-optimize-vector-induction-width.ll | 898 ++++++++++++++++++
 1 file changed, 898 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll

diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
new file mode 100644
index 0000000000000..252d6b063eee9
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
@@ -0,0 +1,898 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+define dso_local void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_small_tc_i8(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 14)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 15
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 15
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_upper_limit_i8(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 254)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 256, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 255
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 255
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_lower_limit_i16(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 256)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 264
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 264, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 257
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 257
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_upper_limit_i16(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 65534)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 65536, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 65535
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 65535
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_lower_limit_i32(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 65536)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65544
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 65544, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 65537
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 65537
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_upper_limit_i32(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 4294967294)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967296
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967296, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 4294967295
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 4294967295
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_lower_limit_i64(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 4294967296)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967304
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967304, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 4294967297
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 4294967297
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_upper_limit_i64(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 -2)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], -1
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 18446744073709551615
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
+; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
+; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
+; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
+;.

>From fec03843da8ea90769f02a66518536fcf4cdb926 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 5 Dec 2024 16:22:15 +0000
Subject: [PATCH 2/2] [LV] Optimize VPWidenIntOrFpInductionRecipe for known TC

Optimize the IR generated for a VPWidenIntOrFpInductionRecipe to use the
narrowest type necessary, when the trip-count of a loop is known to be
constant and the only use of the recipe is the condition used by the
vector loop's backedge branch.
---
 llvm/lib/Transforms/Vectorize/VPlan.h         |    3 +
 .../Transforms/Vectorize/VPlanTransforms.cpp  |   99 +-
 .../Transforms/Vectorize/VPlanTransforms.h    |    7 +
 .../AArch64/conditional-branches-cost.ll      |   18 +-
 .../predicated-first-order-recurrence.ll      |    6 +-
 .../X86/consecutive-ptr-uniforms.ll           |    6 +-
 .../pr45679-fold-tail-by-masking.ll           |   12 +-
 .../LoopVectorize/reduction-inloop-pred.ll    |   90 +-
 .../LoopVectorize/reduction-predselect.ll     |   54 +-
 .../tail-folding-alloca-in-loop.ll            |    6 +-
 ...timize-vector-induction-width-unrolling.ll | 4162 +++++++++++++++++
 ...folding-optimize-vector-induction-width.ll |  200 +-
 .../LoopVectorize/tail-folding-switch.ll      |    6 +-
 13 files changed, 4515 insertions(+), 154 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width-unrolling.ll

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fbbc466f2f7f6..c9adb6c975202 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1843,6 +1843,9 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
              VPSlotTracker &SlotTracker) const override;
 #endif
 
+  /// Update the step value of the recipe.
+  void setStepValue(VPValue *V) { setOperand(1, V); }
+
   VPValue *getVFValue() { return getOperand(2); }
   const VPValue *getVFValue() const { return getOperand(2); }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6c917e4eef655..1b81bee36a618 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -20,6 +20,7 @@
 #include "VPlanPatternMatch.h"
 #include "VPlanUtils.h"
 #include "VPlanVerifier.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -29,6 +30,8 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/TypeSize.h"
 
 using namespace llvm;
 
@@ -975,11 +978,74 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
   }
 }
 
-void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
-                                         unsigned BestUF,
-                                         PredicatedScalarEvolution &PSE) {
-  assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
-  assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+/// Optimize the width of vector induction variables in \p Plan based on a known
+/// constant Trip Count, \p BestVF and \p BestUF.
+static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
+                                                     ElementCount BestVF,
+                                                     unsigned BestUF) {
+  // Only proceed if we have not completely removed the vector region.
+  if (!Plan.getVectorLoopRegion())
+    return false;
+
+  auto *TC = dyn_cast_if_present<ConstantInt>(
+      Plan.getTripCount()->getUnderlyingValue());
+  if (!TC || !BestVF.isFixed())
+    return false;
+
+  // Calculate the widest type required for known TC, VF and UF.
+  auto ComputeBitWidth = [](APInt TC, uint64_t Align) {
+    auto AlignedTC =
+        Align * APIntOps::RoundingUDiv(TC, APInt(TC.getBitWidth(), Align),
+                                       APInt::Rounding::UP);
+    auto MaxVal = AlignedTC - 1;
+    return std::max<unsigned>(PowerOf2Ceil(MaxVal.getActiveBits()), 8);
+  };
+  unsigned NewBitWidth =
+      ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF);
+
+  LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext();
+  auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth);
+
+  bool MadeChange = false;
+
+  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
+    auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
+    if (!WideIV || !WideIV->isCanonical() ||
+        WideIV->hasMoreThanOneUniqueUser() ||
+        NewIVTy == WideIV->getScalarType())
+      continue;
+
+    // Currently only handle cases where the single user is a header-mask
+    // comparison with the backedge-taken-count.
+    using namespace VPlanPatternMatch;
+    if (!match(*WideIV->user_begin(),
+               m_Binary<Instruction::ICmp>(
+                   m_Specific(WideIV),
+                   m_Specific(Plan.getOrCreateBackedgeTakenCount()))))
+      continue;
+
+    // Update IV operands and comparison bound to use new narrower type.
+    auto *NewStart = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
+    WideIV->setStartValue(NewStart);
+    auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
+    WideIV->setStepValue(NewStep);
+
+    auto *NewBTC = new VPWidenCastRecipe(
+        Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy);
+    Plan.getVectorPreheader()->appendRecipe(NewBTC);
+    auto *Cmp = dyn_cast<VPInstruction>(*WideIV->user_begin());
+    Cmp->setOperand(1, NewBTC);
+
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+bool VPlanTransforms::simplifyBranchConditionForVFAndUF(
+    VPlan &Plan, ElementCount BestVF, unsigned BestUF,
+    PredicatedScalarEvolution &PSE) {
   VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
   VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock();
   auto *Term = &ExitingVPBB->back();
@@ -992,7 +1058,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
   if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
       !match(Term,
              m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))
-    return;
+    return false;
 
   ScalarEvolution &SE = *PSE.getSE();
   const SCEV *TripCount =
@@ -1003,7 +1069,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
   const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
   if (TripCount->isZero() ||
       !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
-    return;
+    return false;
 
   // The vector loop region only executes once. If possible, completely remove
   // the region, otherwise replace the terminator controlling the latch with
@@ -1042,8 +1108,23 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
 
   Term->eraseFromParent();
 
-  Plan.setVF(BestVF);
-  Plan.setUF(BestUF);
+  return true;
+}
+
+void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
+                                         unsigned BestUF,
+                                         PredicatedScalarEvolution &PSE) {
+  assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
+  assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+
+  bool MadeChange =
+      simplifyBranchConditionForVFAndUF(Plan, BestVF, BestUF, PSE);
+  MadeChange |= optimizeVectorInductionWidthForTCAndVFUF(Plan, BestVF, BestUF);
+
+  if (MadeChange) {
+    Plan.setVF(BestVF);
+    Plan.setUF(BestUF);
+  }
   // TODO: Further simplifications are possible
   //      1. Replace inductions with constants.
   //      2. Replace vector loop region with VPBasicBlock.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 3dd476a8526d6..0ff623dbd5aa8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -82,6 +82,13 @@ struct VPlanTransforms {
                                  unsigned BestUF,
                                  PredicatedScalarEvolution &PSE);
 
+  /// Try to simplify the branch condition of \p Plan. This may restrict the
+  /// resulting plan to \p BestVF and \p BestUF.
+  static bool simplifyBranchConditionForVFAndUF(VPlan &Plan,
+                                                ElementCount BestVF,
+                                                unsigned BestUF,
+                                                PredicatedScalarEvolution &PSE);
+
   /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
   /// optimizations, dead recipe removal, replicate region optimizations and
   /// block merging.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 754b86ab2fb87..ec23b7fd22ed6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -389,8 +389,8 @@ define void @latch_branch_cost(ptr %dst) {
 ; PRED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; PRED:       vector.body:
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; PRED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 99)
+; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; PRED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 99)
 ; PRED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; PRED-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; PRED:       pred.store.if:
@@ -456,7 +456,7 @@ define void @latch_branch_cost(ptr %dst) {
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; PRED:       pred.store.continue14:
 ; PRED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; PRED-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104
 ; PRED-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; PRED:       middle.block:
@@ -903,9 +903,9 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DEFAULT:       vector.body:
 ; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
-; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
+; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6)
+; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6)
 ; DEFAULT-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
 ; DEFAULT-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DEFAULT:       pred.store.if:
@@ -978,7 +978,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; DEFAULT-NEXT:    store i8 [[TMP33]], ptr [[TMP32]], align 1
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; DEFAULT:       pred.store.continue14:
-; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; DEFAULT-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; DEFAULT:       middle.block:
@@ -1005,9 +1005,9 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; PRED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; PRED:       vector.body:
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
-; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
+; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
 ; PRED-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; PRED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6)
+; PRED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6)
 ; PRED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
 ; PRED-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; PRED:       pred.store.if:
@@ -1080,7 +1080,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; PRED-NEXT:    store i8 [[TMP33]], ptr [[TMP32]], align 1
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; PRED:       pred.store.continue14:
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; PRED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; PRED-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
 ; PRED:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
index 7b0fa644ea001..3ea4f1aea51d1 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
@@ -19,10 +19,10 @@ define void @func_21() {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
 ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -59,7 +59,7 @@ define void @func_21() {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index e685a83d9ccbb..e26221eeefd2d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -86,8 +86,8 @@ attributes #0 = { "target-cpu"="knl" }
 ; FORCE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FORCE:       vector.body:
 ; FORCE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
-; FORCE-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
-; FORCE-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 2)
+; FORCE-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
+; FORCE-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 2)
 ; FORCE-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; FORCE-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FORCE:       pred.store.if:
@@ -103,7 +103,7 @@ attributes #0 = { "target-cpu"="knl" }
 ; FORCE-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; FORCE:       pred.store.continue2:
 ; FORCE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; FORCE-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; FORCE-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
 ; FORCE-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
 ; FORCE-NEXT:    br i1 [[TMP15]], label {{%.*}}, label [[VECTOR_BODY]]
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index c301ef3c5319a..b207cca03c90f 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -18,8 +18,8 @@ define void @pr45679(ptr %A) optsize {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 13)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 13)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -53,7 +53,7 @@ define void @pr45679(ptr %A) optsize {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -213,8 +213,8 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 13)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 13)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -252,7 +252,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index 8e132ed8399cd..c76057a18bf3c 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -11,9 +11,9 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -57,7 +57,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -97,10 +97,10 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -166,7 +166,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]])
 ; CHECK-NEXT:    [[TMP48]] = add i32 [[TMP47]], [[TMP45]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -212,9 +212,9 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -261,7 +261,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]])
 ; CHECK-NEXT:    [[TMP29]] = add i32 [[TMP28]], [[TMP26]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -302,10 +302,10 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -371,7 +371,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]])
 ; CHECK-NEXT:    [[TMP48]] = mul i32 [[TMP47]], [[TMP45]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -417,10 +417,10 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -484,7 +484,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]])
 ; CHECK-NEXT:    [[TMP46]] = add i32 [[TMP45]], [[TMP43]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -530,9 +530,9 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -595,7 +595,7 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = mul i32 [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -638,9 +638,9 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -703,7 +703,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = and i32 [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -746,9 +746,9 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -809,7 +809,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]])
 ; CHECK-NEXT:    [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -852,9 +852,9 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -915,7 +915,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]])
 ; CHECK-NEXT:    [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       middle.block:
@@ -958,9 +958,9 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1021,7 +1021,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1064,9 +1064,9 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1129,7 +1129,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1172,9 +1172,9 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1218,7 +1218,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1259,9 +1259,9 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1305,7 +1305,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1438,9 +1438,9 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
@@ -1488,7 +1488,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1534,9 +1534,9 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1583,7 +1583,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
index f95be1a221e73..3d40707a5e97e 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
@@ -11,9 +11,9 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -56,7 +56,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -203,9 +203,9 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -265,7 +265,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -308,9 +308,9 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -370,7 +370,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> splat (i32 -1)
 ; CHECK-NEXT:    [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -413,9 +413,9 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -475,7 +475,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42]] = or <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -518,9 +518,9 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -580,7 +580,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42]] = xor <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -623,9 +623,9 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -685,7 +685,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = fadd fast <4 x float> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -728,9 +728,9 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -790,7 +790,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = fmul fast <4 x float> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -833,9 +833,9 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -878,7 +878,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
 ; CHECK-NEXT:    [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       middle.block:
@@ -919,9 +919,9 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -964,7 +964,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
 ; CHECK-NEXT:    [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
index 56fc8eac35bad..3a54244a41017 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -10,8 +10,8 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 200)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -49,7 +49,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
 ; CHECK:       [[PRED_STORE_CONTINUE6]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204
 ; CHECK-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width-unrolling.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width-unrolling.ll
new file mode 100644
index 0000000000000..ed612c2978f00
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width-unrolling.ll
@@ -0,0 +1,4162 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -force-vector-width=64 -force-vector-interleave=8 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
+
+define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_small_tc_i8(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE1022:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <64 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31, i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43, i64 44, i64 45, i64 46, i64 47, i64 48, i64 49, i64 50, i64 51, i64 52, i64 53, i64 54, i64 55, i64 56, i64 57, i64 58, i64 59, i64 60, i64 61, i64 62, i64 63>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE1022]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <64 x i64> [[VEC_IND]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <64 x i64> [[STEP_ADD]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <64 x i64> [[STEP_ADD_2]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_4:%.*]] = add <64 x i64> [[STEP_ADD_3]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_5:%.*]] = add <64 x i64> [[STEP_ADD_4]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_6:%.*]] = add <64 x i64> [[STEP_ADD_5]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_7:%.*]] = add <64 x i64> [[STEP_ADD_6]], splat (i64 64)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <64 x i64> [[VEC_IND]], splat (i64 14)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <64 x i64> [[STEP_ADD]], splat (i64 14)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <64 x i64> [[STEP_ADD_2]], splat (i64 14)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <64 x i64> [[STEP_ADD_3]], splat (i64 14)
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <64 x i64> [[STEP_ADD_4]], splat (i64 14)
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule <64 x i64> [[STEP_ADD_5]], splat (i64 14)
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule <64 x i64> [[STEP_ADD_6]], splat (i64 14)
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ule <64 x i64> [[STEP_ADD_7]], splat (i64 14)
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <64 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP9]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP10]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <64 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP12]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP13]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <64 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP15]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP16]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <64 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP18:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP18]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP19]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <64 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP21]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP22]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <64 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP24:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP24]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP25]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <64 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP27]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP28]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <64 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP29]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP30:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP30]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP31]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <64 x i1> [[TMP0]], i32 8
+; CHECK-NEXT:    br i1 [[TMP32]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; CHECK:       [[PRED_STORE_IF15]]:
+; CHECK-NEXT:    [[TMP33:%.*]] = add i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP33]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP34]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
+; CHECK:       [[PRED_STORE_CONTINUE16]]:
+; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <64 x i1> [[TMP0]], i32 9
+; CHECK-NEXT:    br i1 [[TMP35]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; CHECK:       [[PRED_STORE_IF17]]:
+; CHECK-NEXT:    [[TMP36:%.*]] = add i64 [[INDEX]], 9
+; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP36]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP37]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
+; CHECK:       [[PRED_STORE_CONTINUE18]]:
+; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <64 x i1> [[TMP0]], i32 10
+; CHECK-NEXT:    br i1 [[TMP38]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK:       [[PRED_STORE_IF19]]:
+; CHECK-NEXT:    [[TMP39:%.*]] = add i64 [[INDEX]], 10
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP39]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP40]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
+; CHECK:       [[PRED_STORE_CONTINUE20]]:
+; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <64 x i1> [[TMP0]], i32 11
+; CHECK-NEXT:    br i1 [[TMP41]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; CHECK:       [[PRED_STORE_IF21]]:
+; CHECK-NEXT:    [[TMP42:%.*]] = add i64 [[INDEX]], 11
+; CHECK-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP42]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP43]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
+; CHECK:       [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <64 x i1> [[TMP0]], i32 12
+; CHECK-NEXT:    br i1 [[TMP44]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; CHECK:       [[PRED_STORE_IF23]]:
+; CHECK-NEXT:    [[TMP45:%.*]] = add i64 [[INDEX]], 12
+; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP45]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP46]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
+; CHECK:       [[PRED_STORE_CONTINUE24]]:
+; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <64 x i1> [[TMP0]], i32 13
+; CHECK-NEXT:    br i1 [[TMP47]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; CHECK:       [[PRED_STORE_IF25]]:
+; CHECK-NEXT:    [[TMP48:%.*]] = add i64 [[INDEX]], 13
+; CHECK-NEXT:    [[TMP49:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP48]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP49]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
+; CHECK:       [[PRED_STORE_CONTINUE26]]:
+; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <64 x i1> [[TMP0]], i32 14
+; CHECK-NEXT:    br i1 [[TMP50]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; CHECK:       [[PRED_STORE_IF27]]:
+; CHECK-NEXT:    [[TMP51:%.*]] = add i64 [[INDEX]], 14
+; CHECK-NEXT:    [[TMP52:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP51]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP52]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
+; CHECK:       [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT:    [[TMP53:%.*]] = extractelement <64 x i1> [[TMP0]], i32 15
+; CHECK-NEXT:    br i1 [[TMP53]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; CHECK:       [[PRED_STORE_IF29]]:
+; CHECK-NEXT:    [[TMP54:%.*]] = add i64 [[INDEX]], 15
+; CHECK-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP54]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP55]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
+; CHECK:       [[PRED_STORE_CONTINUE30]]:
+; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <64 x i1> [[TMP0]], i32 16
+; CHECK-NEXT:    br i1 [[TMP56]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]]
+; CHECK:       [[PRED_STORE_IF31]]:
+; CHECK-NEXT:    [[TMP57:%.*]] = add i64 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP57]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP58]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE32]]
+; CHECK:       [[PRED_STORE_CONTINUE32]]:
+; CHECK-NEXT:    [[TMP59:%.*]] = extractelement <64 x i1> [[TMP0]], i32 17
+; CHECK-NEXT:    br i1 [[TMP59]], label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]]
+; CHECK:       [[PRED_STORE_IF33]]:
+; CHECK-NEXT:    [[TMP60:%.*]] = add i64 [[INDEX]], 17
+; CHECK-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP60]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP61]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE34]]
+; CHECK:       [[PRED_STORE_CONTINUE34]]:
+; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <64 x i1> [[TMP0]], i32 18
+; CHECK-NEXT:    br i1 [[TMP62]], label %[[PRED_STORE_IF35:.*]], label %[[PRED_STORE_CONTINUE36:.*]]
+; CHECK:       [[PRED_STORE_IF35]]:
+; CHECK-NEXT:    [[TMP63:%.*]] = add i64 [[INDEX]], 18
+; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP63]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP64]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE36]]
+; CHECK:       [[PRED_STORE_CONTINUE36]]:
+; CHECK-NEXT:    [[TMP65:%.*]] = extractelement <64 x i1> [[TMP0]], i32 19
+; CHECK-NEXT:    br i1 [[TMP65]], label %[[PRED_STORE_IF37:.*]], label %[[PRED_STORE_CONTINUE38:.*]]
+; CHECK:       [[PRED_STORE_IF37]]:
+; CHECK-NEXT:    [[TMP66:%.*]] = add i64 [[INDEX]], 19
+; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP66]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP67]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE38]]
+; CHECK:       [[PRED_STORE_CONTINUE38]]:
+; CHECK-NEXT:    [[TMP68:%.*]] = extractelement <64 x i1> [[TMP0]], i32 20
+; CHECK-NEXT:    br i1 [[TMP68]], label %[[PRED_STORE_IF39:.*]], label %[[PRED_STORE_CONTINUE40:.*]]
+; CHECK:       [[PRED_STORE_IF39]]:
+; CHECK-NEXT:    [[TMP69:%.*]] = add i64 [[INDEX]], 20
+; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP69]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP70]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE40]]
+; CHECK:       [[PRED_STORE_CONTINUE40]]:
+; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <64 x i1> [[TMP0]], i32 21
+; CHECK-NEXT:    br i1 [[TMP71]], label %[[PRED_STORE_IF41:.*]], label %[[PRED_STORE_CONTINUE42:.*]]
+; CHECK:       [[PRED_STORE_IF41]]:
+; CHECK-NEXT:    [[TMP72:%.*]] = add i64 [[INDEX]], 21
+; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP72]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP73]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE42]]
+; CHECK:       [[PRED_STORE_CONTINUE42]]:
+; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <64 x i1> [[TMP0]], i32 22
+; CHECK-NEXT:    br i1 [[TMP74]], label %[[PRED_STORE_IF43:.*]], label %[[PRED_STORE_CONTINUE44:.*]]
+; CHECK:       [[PRED_STORE_IF43]]:
+; CHECK-NEXT:    [[TMP75:%.*]] = add i64 [[INDEX]], 22
+; CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP75]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP76]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE44]]
+; CHECK:       [[PRED_STORE_CONTINUE44]]:
+; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <64 x i1> [[TMP0]], i32 23
+; CHECK-NEXT:    br i1 [[TMP77]], label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]]
+; CHECK:       [[PRED_STORE_IF45]]:
+; CHECK-NEXT:    [[TMP78:%.*]] = add i64 [[INDEX]], 23
+; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP78]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP79]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE46]]
+; CHECK:       [[PRED_STORE_CONTINUE46]]:
+; CHECK-NEXT:    [[TMP80:%.*]] = extractelement <64 x i1> [[TMP0]], i32 24
+; CHECK-NEXT:    br i1 [[TMP80]], label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]]
+; CHECK:       [[PRED_STORE_IF47]]:
+; CHECK-NEXT:    [[TMP81:%.*]] = add i64 [[INDEX]], 24
+; CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP81]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP82]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE48]]
+; CHECK:       [[PRED_STORE_CONTINUE48]]:
+; CHECK-NEXT:    [[TMP83:%.*]] = extractelement <64 x i1> [[TMP0]], i32 25
+; CHECK-NEXT:    br i1 [[TMP83]], label %[[PRED_STORE_IF49:.*]], label %[[PRED_STORE_CONTINUE50:.*]]
+; CHECK:       [[PRED_STORE_IF49]]:
+; CHECK-NEXT:    [[TMP84:%.*]] = add i64 [[INDEX]], 25
+; CHECK-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP84]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP85]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE50]]
+; CHECK:       [[PRED_STORE_CONTINUE50]]:
+; CHECK-NEXT:    [[TMP86:%.*]] = extractelement <64 x i1> [[TMP0]], i32 26
+; CHECK-NEXT:    br i1 [[TMP86]], label %[[PRED_STORE_IF51:.*]], label %[[PRED_STORE_CONTINUE52:.*]]
+; CHECK:       [[PRED_STORE_IF51]]:
+; CHECK-NEXT:    [[TMP87:%.*]] = add i64 [[INDEX]], 26
+; CHECK-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP87]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP88]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE52]]
+; CHECK:       [[PRED_STORE_CONTINUE52]]:
+; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <64 x i1> [[TMP0]], i32 27
+; CHECK-NEXT:    br i1 [[TMP89]], label %[[PRED_STORE_IF53:.*]], label %[[PRED_STORE_CONTINUE54:.*]]
+; CHECK:       [[PRED_STORE_IF53]]:
+; CHECK-NEXT:    [[TMP90:%.*]] = add i64 [[INDEX]], 27
+; CHECK-NEXT:    [[TMP91:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP90]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP91]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE54]]
+; CHECK:       [[PRED_STORE_CONTINUE54]]:
+; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <64 x i1> [[TMP0]], i32 28
+; CHECK-NEXT:    br i1 [[TMP92]], label %[[PRED_STORE_IF55:.*]], label %[[PRED_STORE_CONTINUE56:.*]]
+; CHECK:       [[PRED_STORE_IF55]]:
+; CHECK-NEXT:    [[TMP93:%.*]] = add i64 [[INDEX]], 28
+; CHECK-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP93]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP94]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE56]]
+; CHECK:       [[PRED_STORE_CONTINUE56]]:
+; CHECK-NEXT:    [[TMP95:%.*]] = extractelement <64 x i1> [[TMP0]], i32 29
+; CHECK-NEXT:    br i1 [[TMP95]], label %[[PRED_STORE_IF57:.*]], label %[[PRED_STORE_CONTINUE58:.*]]
+; CHECK:       [[PRED_STORE_IF57]]:
+; CHECK-NEXT:    [[TMP96:%.*]] = add i64 [[INDEX]], 29
+; CHECK-NEXT:    [[TMP97:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP96]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP97]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE58]]
+; CHECK:       [[PRED_STORE_CONTINUE58]]:
+; CHECK-NEXT:    [[TMP98:%.*]] = extractelement <64 x i1> [[TMP0]], i32 30
+; CHECK-NEXT:    br i1 [[TMP98]], label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]]
+; CHECK:       [[PRED_STORE_IF59]]:
+; CHECK-NEXT:    [[TMP99:%.*]] = add i64 [[INDEX]], 30
+; CHECK-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP99]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP100]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE60]]
+; CHECK:       [[PRED_STORE_CONTINUE60]]:
+; CHECK-NEXT:    [[TMP101:%.*]] = extractelement <64 x i1> [[TMP0]], i32 31
+; CHECK-NEXT:    br i1 [[TMP101]], label %[[PRED_STORE_IF61:.*]], label %[[PRED_STORE_CONTINUE62:.*]]
+; CHECK:       [[PRED_STORE_IF61]]:
+; CHECK-NEXT:    [[TMP102:%.*]] = add i64 [[INDEX]], 31
+; CHECK-NEXT:    [[TMP103:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP102]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP103]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE62]]
+; CHECK:       [[PRED_STORE_CONTINUE62]]:
+; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <64 x i1> [[TMP0]], i32 32
+; CHECK-NEXT:    br i1 [[TMP104]], label %[[PRED_STORE_IF63:.*]], label %[[PRED_STORE_CONTINUE64:.*]]
+; CHECK:       [[PRED_STORE_IF63]]:
+; CHECK-NEXT:    [[TMP105:%.*]] = add i64 [[INDEX]], 32
+; CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP105]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP106]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE64]]
+; CHECK:       [[PRED_STORE_CONTINUE64]]:
+; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <64 x i1> [[TMP0]], i32 33
+; CHECK-NEXT:    br i1 [[TMP107]], label %[[PRED_STORE_IF65:.*]], label %[[PRED_STORE_CONTINUE66:.*]]
+; CHECK:       [[PRED_STORE_IF65]]:
+; CHECK-NEXT:    [[TMP108:%.*]] = add i64 [[INDEX]], 33
+; CHECK-NEXT:    [[TMP109:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP108]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP109]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE66]]
+; CHECK:       [[PRED_STORE_CONTINUE66]]:
+; CHECK-NEXT:    [[TMP110:%.*]] = extractelement <64 x i1> [[TMP0]], i32 34
+; CHECK-NEXT:    br i1 [[TMP110]], label %[[PRED_STORE_IF67:.*]], label %[[PRED_STORE_CONTINUE68:.*]]
+; CHECK:       [[PRED_STORE_IF67]]:
+; CHECK-NEXT:    [[TMP111:%.*]] = add i64 [[INDEX]], 34
+; CHECK-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP111]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP112]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE68]]
+; CHECK:       [[PRED_STORE_CONTINUE68]]:
+; CHECK-NEXT:    [[TMP113:%.*]] = extractelement <64 x i1> [[TMP0]], i32 35
+; CHECK-NEXT:    br i1 [[TMP113]], label %[[PRED_STORE_IF69:.*]], label %[[PRED_STORE_CONTINUE70:.*]]
+; CHECK:       [[PRED_STORE_IF69]]:
+; CHECK-NEXT:    [[TMP114:%.*]] = add i64 [[INDEX]], 35
+; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP114]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP115]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE70]]
+; CHECK:       [[PRED_STORE_CONTINUE70]]:
+; CHECK-NEXT:    [[TMP116:%.*]] = extractelement <64 x i1> [[TMP0]], i32 36
+; CHECK-NEXT:    br i1 [[TMP116]], label %[[PRED_STORE_IF71:.*]], label %[[PRED_STORE_CONTINUE72:.*]]
+; CHECK:       [[PRED_STORE_IF71]]:
+; CHECK-NEXT:    [[TMP117:%.*]] = add i64 [[INDEX]], 36
+; CHECK-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP117]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP118]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE72]]
+; CHECK:       [[PRED_STORE_CONTINUE72]]:
+; CHECK-NEXT:    [[TMP119:%.*]] = extractelement <64 x i1> [[TMP0]], i32 37
+; CHECK-NEXT:    br i1 [[TMP119]], label %[[PRED_STORE_IF73:.*]], label %[[PRED_STORE_CONTINUE74:.*]]
+; CHECK:       [[PRED_STORE_IF73]]:
+; CHECK-NEXT:    [[TMP120:%.*]] = add i64 [[INDEX]], 37
+; CHECK-NEXT:    [[TMP121:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP120]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP121]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE74]]
+; CHECK:       [[PRED_STORE_CONTINUE74]]:
+; CHECK-NEXT:    [[TMP122:%.*]] = extractelement <64 x i1> [[TMP0]], i32 38
+; CHECK-NEXT:    br i1 [[TMP122]], label %[[PRED_STORE_IF75:.*]], label %[[PRED_STORE_CONTINUE76:.*]]
+; CHECK:       [[PRED_STORE_IF75]]:
+; CHECK-NEXT:    [[TMP123:%.*]] = add i64 [[INDEX]], 38
+; CHECK-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP123]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP124]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE76]]
+; CHECK:       [[PRED_STORE_CONTINUE76]]:
+; CHECK-NEXT:    [[TMP125:%.*]] = extractelement <64 x i1> [[TMP0]], i32 39
+; CHECK-NEXT:    br i1 [[TMP125]], label %[[PRED_STORE_IF77:.*]], label %[[PRED_STORE_CONTINUE78:.*]]
+; CHECK:       [[PRED_STORE_IF77]]:
+; CHECK-NEXT:    [[TMP126:%.*]] = add i64 [[INDEX]], 39
+; CHECK-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP126]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP127]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE78]]
+; CHECK:       [[PRED_STORE_CONTINUE78]]:
+; CHECK-NEXT:    [[TMP128:%.*]] = extractelement <64 x i1> [[TMP0]], i32 40
+; CHECK-NEXT:    br i1 [[TMP128]], label %[[PRED_STORE_IF79:.*]], label %[[PRED_STORE_CONTINUE80:.*]]
+; CHECK:       [[PRED_STORE_IF79]]:
+; CHECK-NEXT:    [[TMP129:%.*]] = add i64 [[INDEX]], 40
+; CHECK-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP129]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP130]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE80]]
+; CHECK:       [[PRED_STORE_CONTINUE80]]:
+; CHECK-NEXT:    [[TMP131:%.*]] = extractelement <64 x i1> [[TMP0]], i32 41
+; CHECK-NEXT:    br i1 [[TMP131]], label %[[PRED_STORE_IF81:.*]], label %[[PRED_STORE_CONTINUE82:.*]]
+; CHECK:       [[PRED_STORE_IF81]]:
+; CHECK-NEXT:    [[TMP132:%.*]] = add i64 [[INDEX]], 41
+; CHECK-NEXT:    [[TMP133:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP132]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP133]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE82]]
+; CHECK:       [[PRED_STORE_CONTINUE82]]:
+; CHECK-NEXT:    [[TMP134:%.*]] = extractelement <64 x i1> [[TMP0]], i32 42
+; CHECK-NEXT:    br i1 [[TMP134]], label %[[PRED_STORE_IF83:.*]], label %[[PRED_STORE_CONTINUE84:.*]]
+; CHECK:       [[PRED_STORE_IF83]]:
+; CHECK-NEXT:    [[TMP135:%.*]] = add i64 [[INDEX]], 42
+; CHECK-NEXT:    [[TMP136:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP135]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP136]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE84]]
+; CHECK:       [[PRED_STORE_CONTINUE84]]:
+; CHECK-NEXT:    [[TMP137:%.*]] = extractelement <64 x i1> [[TMP0]], i32 43
+; CHECK-NEXT:    br i1 [[TMP137]], label %[[PRED_STORE_IF85:.*]], label %[[PRED_STORE_CONTINUE86:.*]]
+; CHECK:       [[PRED_STORE_IF85]]:
+; CHECK-NEXT:    [[TMP138:%.*]] = add i64 [[INDEX]], 43
+; CHECK-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP138]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP139]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE86]]
+; CHECK:       [[PRED_STORE_CONTINUE86]]:
+; CHECK-NEXT:    [[TMP140:%.*]] = extractelement <64 x i1> [[TMP0]], i32 44
+; CHECK-NEXT:    br i1 [[TMP140]], label %[[PRED_STORE_IF87:.*]], label %[[PRED_STORE_CONTINUE88:.*]]
+; CHECK:       [[PRED_STORE_IF87]]:
+; CHECK-NEXT:    [[TMP141:%.*]] = add i64 [[INDEX]], 44
+; CHECK-NEXT:    [[TMP142:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP141]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP142]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE88]]
+; CHECK:       [[PRED_STORE_CONTINUE88]]:
+; CHECK-NEXT:    [[TMP143:%.*]] = extractelement <64 x i1> [[TMP0]], i32 45
+; CHECK-NEXT:    br i1 [[TMP143]], label %[[PRED_STORE_IF89:.*]], label %[[PRED_STORE_CONTINUE90:.*]]
+; CHECK:       [[PRED_STORE_IF89]]:
+; CHECK-NEXT:    [[TMP144:%.*]] = add i64 [[INDEX]], 45
+; CHECK-NEXT:    [[TMP145:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP144]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP145]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE90]]
+; CHECK:       [[PRED_STORE_CONTINUE90]]:
+; CHECK-NEXT:    [[TMP146:%.*]] = extractelement <64 x i1> [[TMP0]], i32 46
+; CHECK-NEXT:    br i1 [[TMP146]], label %[[PRED_STORE_IF91:.*]], label %[[PRED_STORE_CONTINUE92:.*]]
+; CHECK:       [[PRED_STORE_IF91]]:
+; CHECK-NEXT:    [[TMP147:%.*]] = add i64 [[INDEX]], 46
+; CHECK-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP147]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP148]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE92]]
+; CHECK:       [[PRED_STORE_CONTINUE92]]:
+; CHECK-NEXT:    [[TMP149:%.*]] = extractelement <64 x i1> [[TMP0]], i32 47
+; CHECK-NEXT:    br i1 [[TMP149]], label %[[PRED_STORE_IF93:.*]], label %[[PRED_STORE_CONTINUE94:.*]]
+; CHECK:       [[PRED_STORE_IF93]]:
+; CHECK-NEXT:    [[TMP150:%.*]] = add i64 [[INDEX]], 47
+; CHECK-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP150]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP151]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE94]]
+; CHECK:       [[PRED_STORE_CONTINUE94]]:
+; CHECK-NEXT:    [[TMP152:%.*]] = extractelement <64 x i1> [[TMP0]], i32 48
+; CHECK-NEXT:    br i1 [[TMP152]], label %[[PRED_STORE_IF95:.*]], label %[[PRED_STORE_CONTINUE96:.*]]
+; CHECK:       [[PRED_STORE_IF95]]:
+; CHECK-NEXT:    [[TMP153:%.*]] = add i64 [[INDEX]], 48
+; CHECK-NEXT:    [[TMP154:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP153]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP154]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE96]]
+; CHECK:       [[PRED_STORE_CONTINUE96]]:
+; CHECK-NEXT:    [[TMP155:%.*]] = extractelement <64 x i1> [[TMP0]], i32 49
+; CHECK-NEXT:    br i1 [[TMP155]], label %[[PRED_STORE_IF97:.*]], label %[[PRED_STORE_CONTINUE98:.*]]
+; CHECK:       [[PRED_STORE_IF97]]:
+; CHECK-NEXT:    [[TMP156:%.*]] = add i64 [[INDEX]], 49
+; CHECK-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP156]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP157]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE98]]
+; CHECK:       [[PRED_STORE_CONTINUE98]]:
+; CHECK-NEXT:    [[TMP158:%.*]] = extractelement <64 x i1> [[TMP0]], i32 50
+; CHECK-NEXT:    br i1 [[TMP158]], label %[[PRED_STORE_IF99:.*]], label %[[PRED_STORE_CONTINUE100:.*]]
+; CHECK:       [[PRED_STORE_IF99]]:
+; CHECK-NEXT:    [[TMP159:%.*]] = add i64 [[INDEX]], 50
+; CHECK-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP159]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP160]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE100]]
+; CHECK:       [[PRED_STORE_CONTINUE100]]:
+; CHECK-NEXT:    [[TMP161:%.*]] = extractelement <64 x i1> [[TMP0]], i32 51
+; CHECK-NEXT:    br i1 [[TMP161]], label %[[PRED_STORE_IF101:.*]], label %[[PRED_STORE_CONTINUE102:.*]]
+; CHECK:       [[PRED_STORE_IF101]]:
+; CHECK-NEXT:    [[TMP162:%.*]] = add i64 [[INDEX]], 51
+; CHECK-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP162]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP163]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE102]]
+; CHECK:       [[PRED_STORE_CONTINUE102]]:
+; CHECK-NEXT:    [[TMP164:%.*]] = extractelement <64 x i1> [[TMP0]], i32 52
+; CHECK-NEXT:    br i1 [[TMP164]], label %[[PRED_STORE_IF103:.*]], label %[[PRED_STORE_CONTINUE104:.*]]
+; CHECK:       [[PRED_STORE_IF103]]:
+; CHECK-NEXT:    [[TMP165:%.*]] = add i64 [[INDEX]], 52
+; CHECK-NEXT:    [[TMP166:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP165]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP166]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE104]]
+; CHECK:       [[PRED_STORE_CONTINUE104]]:
+; CHECK-NEXT:    [[TMP167:%.*]] = extractelement <64 x i1> [[TMP0]], i32 53
+; CHECK-NEXT:    br i1 [[TMP167]], label %[[PRED_STORE_IF105:.*]], label %[[PRED_STORE_CONTINUE106:.*]]
+; CHECK:       [[PRED_STORE_IF105]]:
+; CHECK-NEXT:    [[TMP168:%.*]] = add i64 [[INDEX]], 53
+; CHECK-NEXT:    [[TMP169:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP168]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP169]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE106]]
+; CHECK:       [[PRED_STORE_CONTINUE106]]:
+; CHECK-NEXT:    [[TMP170:%.*]] = extractelement <64 x i1> [[TMP0]], i32 54
+; CHECK-NEXT:    br i1 [[TMP170]], label %[[PRED_STORE_IF107:.*]], label %[[PRED_STORE_CONTINUE108:.*]]
+; CHECK:       [[PRED_STORE_IF107]]:
+; CHECK-NEXT:    [[TMP171:%.*]] = add i64 [[INDEX]], 54
+; CHECK-NEXT:    [[TMP172:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP171]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP172]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE108]]
+; CHECK:       [[PRED_STORE_CONTINUE108]]:
+; CHECK-NEXT:    [[TMP173:%.*]] = extractelement <64 x i1> [[TMP0]], i32 55
+; CHECK-NEXT:    br i1 [[TMP173]], label %[[PRED_STORE_IF109:.*]], label %[[PRED_STORE_CONTINUE110:.*]]
+; CHECK:       [[PRED_STORE_IF109]]:
+; CHECK-NEXT:    [[TMP174:%.*]] = add i64 [[INDEX]], 55
+; CHECK-NEXT:    [[TMP175:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP174]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP175]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE110]]
+; CHECK:       [[PRED_STORE_CONTINUE110]]:
+; CHECK-NEXT:    [[TMP176:%.*]] = extractelement <64 x i1> [[TMP0]], i32 56
+; CHECK-NEXT:    br i1 [[TMP176]], label %[[PRED_STORE_IF111:.*]], label %[[PRED_STORE_CONTINUE112:.*]]
+; CHECK:       [[PRED_STORE_IF111]]:
+; CHECK-NEXT:    [[TMP177:%.*]] = add i64 [[INDEX]], 56
+; CHECK-NEXT:    [[TMP178:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP177]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP178]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE112]]
+; CHECK:       [[PRED_STORE_CONTINUE112]]:
+; CHECK-NEXT:    [[TMP179:%.*]] = extractelement <64 x i1> [[TMP0]], i32 57
+; CHECK-NEXT:    br i1 [[TMP179]], label %[[PRED_STORE_IF113:.*]], label %[[PRED_STORE_CONTINUE114:.*]]
+; CHECK:       [[PRED_STORE_IF113]]:
+; CHECK-NEXT:    [[TMP180:%.*]] = add i64 [[INDEX]], 57
+; CHECK-NEXT:    [[TMP181:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP180]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP181]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE114]]
+; CHECK:       [[PRED_STORE_CONTINUE114]]:
+; CHECK-NEXT:    [[TMP182:%.*]] = extractelement <64 x i1> [[TMP0]], i32 58
+; CHECK-NEXT:    br i1 [[TMP182]], label %[[PRED_STORE_IF115:.*]], label %[[PRED_STORE_CONTINUE116:.*]]
+; CHECK:       [[PRED_STORE_IF115]]:
+; CHECK-NEXT:    [[TMP183:%.*]] = add i64 [[INDEX]], 58
+; CHECK-NEXT:    [[TMP184:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP183]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP184]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE116]]
+; CHECK:       [[PRED_STORE_CONTINUE116]]:
+; CHECK-NEXT:    [[TMP185:%.*]] = extractelement <64 x i1> [[TMP0]], i32 59
+; CHECK-NEXT:    br i1 [[TMP185]], label %[[PRED_STORE_IF117:.*]], label %[[PRED_STORE_CONTINUE118:.*]]
+; CHECK:       [[PRED_STORE_IF117]]:
+; CHECK-NEXT:    [[TMP186:%.*]] = add i64 [[INDEX]], 59
+; CHECK-NEXT:    [[TMP187:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP186]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP187]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE118]]
+; CHECK:       [[PRED_STORE_CONTINUE118]]:
+; CHECK-NEXT:    [[TMP188:%.*]] = extractelement <64 x i1> [[TMP0]], i32 60
+; CHECK-NEXT:    br i1 [[TMP188]], label %[[PRED_STORE_IF119:.*]], label %[[PRED_STORE_CONTINUE120:.*]]
+; CHECK:       [[PRED_STORE_IF119]]:
+; CHECK-NEXT:    [[TMP189:%.*]] = add i64 [[INDEX]], 60
+; CHECK-NEXT:    [[TMP190:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP189]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP190]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE120]]
+; CHECK:       [[PRED_STORE_CONTINUE120]]:
+; CHECK-NEXT:    [[TMP191:%.*]] = extractelement <64 x i1> [[TMP0]], i32 61
+; CHECK-NEXT:    br i1 [[TMP191]], label %[[PRED_STORE_IF121:.*]], label %[[PRED_STORE_CONTINUE122:.*]]
+; CHECK:       [[PRED_STORE_IF121]]:
+; CHECK-NEXT:    [[TMP192:%.*]] = add i64 [[INDEX]], 61
+; CHECK-NEXT:    [[TMP193:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP192]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP193]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE122]]
+; CHECK:       [[PRED_STORE_CONTINUE122]]:
+; CHECK-NEXT:    [[TMP194:%.*]] = extractelement <64 x i1> [[TMP0]], i32 62
+; CHECK-NEXT:    br i1 [[TMP194]], label %[[PRED_STORE_IF123:.*]], label %[[PRED_STORE_CONTINUE124:.*]]
+; CHECK:       [[PRED_STORE_IF123]]:
+; CHECK-NEXT:    [[TMP195:%.*]] = add i64 [[INDEX]], 62
+; CHECK-NEXT:    [[TMP196:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP195]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP196]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE124]]
+; CHECK:       [[PRED_STORE_CONTINUE124]]:
+; CHECK-NEXT:    [[TMP197:%.*]] = extractelement <64 x i1> [[TMP0]], i32 63
+; CHECK-NEXT:    br i1 [[TMP197]], label %[[PRED_STORE_IF125:.*]], label %[[PRED_STORE_CONTINUE126:.*]]
+; CHECK:       [[PRED_STORE_IF125]]:
+; CHECK-NEXT:    [[TMP198:%.*]] = add i64 [[INDEX]], 63
+; CHECK-NEXT:    [[TMP199:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP198]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP199]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE126]]
+; CHECK:       [[PRED_STORE_CONTINUE126]]:
+; CHECK-NEXT:    [[TMP200:%.*]] = extractelement <64 x i1> [[TMP1]], i32 0
+; CHECK-NEXT:    br i1 [[TMP200]], label %[[PRED_STORE_IF127:.*]], label %[[PRED_STORE_CONTINUE128:.*]]
+; CHECK:       [[PRED_STORE_IF127]]:
+; CHECK-NEXT:    [[TMP201:%.*]] = add i64 [[INDEX]], 64
+; CHECK-NEXT:    [[TMP202:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP201]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP202]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE128]]
+; CHECK:       [[PRED_STORE_CONTINUE128]]:
+; CHECK-NEXT:    [[TMP203:%.*]] = extractelement <64 x i1> [[TMP1]], i32 1
+; CHECK-NEXT:    br i1 [[TMP203]], label %[[PRED_STORE_IF129:.*]], label %[[PRED_STORE_CONTINUE130:.*]]
+; CHECK:       [[PRED_STORE_IF129]]:
+; CHECK-NEXT:    [[TMP204:%.*]] = add i64 [[INDEX]], 65
+; CHECK-NEXT:    [[TMP205:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP204]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP205]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE130]]
+; CHECK:       [[PRED_STORE_CONTINUE130]]:
+; CHECK-NEXT:    [[TMP206:%.*]] = extractelement <64 x i1> [[TMP1]], i32 2
+; CHECK-NEXT:    br i1 [[TMP206]], label %[[PRED_STORE_IF131:.*]], label %[[PRED_STORE_CONTINUE132:.*]]
+; CHECK:       [[PRED_STORE_IF131]]:
+; CHECK-NEXT:    [[TMP207:%.*]] = add i64 [[INDEX]], 66
+; CHECK-NEXT:    [[TMP208:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP207]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP208]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE132]]
+; CHECK:       [[PRED_STORE_CONTINUE132]]:
+; CHECK-NEXT:    [[TMP209:%.*]] = extractelement <64 x i1> [[TMP1]], i32 3
+; CHECK-NEXT:    br i1 [[TMP209]], label %[[PRED_STORE_IF133:.*]], label %[[PRED_STORE_CONTINUE134:.*]]
+; CHECK:       [[PRED_STORE_IF133]]:
+; CHECK-NEXT:    [[TMP210:%.*]] = add i64 [[INDEX]], 67
+; CHECK-NEXT:    [[TMP211:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP210]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP211]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE134]]
+; CHECK:       [[PRED_STORE_CONTINUE134]]:
+; CHECK-NEXT:    [[TMP212:%.*]] = extractelement <64 x i1> [[TMP1]], i32 4
+; CHECK-NEXT:    br i1 [[TMP212]], label %[[PRED_STORE_IF135:.*]], label %[[PRED_STORE_CONTINUE136:.*]]
+; CHECK:       [[PRED_STORE_IF135]]:
+; CHECK-NEXT:    [[TMP213:%.*]] = add i64 [[INDEX]], 68
+; CHECK-NEXT:    [[TMP214:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP213]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP214]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE136]]
+; CHECK:       [[PRED_STORE_CONTINUE136]]:
+; CHECK-NEXT:    [[TMP215:%.*]] = extractelement <64 x i1> [[TMP1]], i32 5
+; CHECK-NEXT:    br i1 [[TMP215]], label %[[PRED_STORE_IF137:.*]], label %[[PRED_STORE_CONTINUE138:.*]]
+; CHECK:       [[PRED_STORE_IF137]]:
+; CHECK-NEXT:    [[TMP216:%.*]] = add i64 [[INDEX]], 69
+; CHECK-NEXT:    [[TMP217:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP216]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP217]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE138]]
+; CHECK:       [[PRED_STORE_CONTINUE138]]:
+; CHECK-NEXT:    [[TMP218:%.*]] = extractelement <64 x i1> [[TMP1]], i32 6
+; CHECK-NEXT:    br i1 [[TMP218]], label %[[PRED_STORE_IF139:.*]], label %[[PRED_STORE_CONTINUE140:.*]]
+; CHECK:       [[PRED_STORE_IF139]]:
+; CHECK-NEXT:    [[TMP219:%.*]] = add i64 [[INDEX]], 70
+; CHECK-NEXT:    [[TMP220:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP219]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP220]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE140]]
+; CHECK:       [[PRED_STORE_CONTINUE140]]:
+; CHECK-NEXT:    [[TMP221:%.*]] = extractelement <64 x i1> [[TMP1]], i32 7
+; CHECK-NEXT:    br i1 [[TMP221]], label %[[PRED_STORE_IF141:.*]], label %[[PRED_STORE_CONTINUE142:.*]]
+; CHECK:       [[PRED_STORE_IF141]]:
+; CHECK-NEXT:    [[TMP222:%.*]] = add i64 [[INDEX]], 71
+; CHECK-NEXT:    [[TMP223:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP222]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP223]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE142]]
+; CHECK:       [[PRED_STORE_CONTINUE142]]:
+; CHECK-NEXT:    [[TMP224:%.*]] = extractelement <64 x i1> [[TMP1]], i32 8
+; CHECK-NEXT:    br i1 [[TMP224]], label %[[PRED_STORE_IF143:.*]], label %[[PRED_STORE_CONTINUE144:.*]]
+; CHECK:       [[PRED_STORE_IF143]]:
+; CHECK-NEXT:    [[TMP225:%.*]] = add i64 [[INDEX]], 72
+; CHECK-NEXT:    [[TMP226:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP225]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP226]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE144]]
+; CHECK:       [[PRED_STORE_CONTINUE144]]:
+; CHECK-NEXT:    [[TMP227:%.*]] = extractelement <64 x i1> [[TMP1]], i32 9
+; CHECK-NEXT:    br i1 [[TMP227]], label %[[PRED_STORE_IF145:.*]], label %[[PRED_STORE_CONTINUE146:.*]]
+; CHECK:       [[PRED_STORE_IF145]]:
+; CHECK-NEXT:    [[TMP228:%.*]] = add i64 [[INDEX]], 73
+; CHECK-NEXT:    [[TMP229:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP228]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP229]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE146]]
+; CHECK:       [[PRED_STORE_CONTINUE146]]:
+; CHECK-NEXT:    [[TMP230:%.*]] = extractelement <64 x i1> [[TMP1]], i32 10
+; CHECK-NEXT:    br i1 [[TMP230]], label %[[PRED_STORE_IF147:.*]], label %[[PRED_STORE_CONTINUE148:.*]]
+; CHECK:       [[PRED_STORE_IF147]]:
+; CHECK-NEXT:    [[TMP231:%.*]] = add i64 [[INDEX]], 74
+; CHECK-NEXT:    [[TMP232:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP231]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP232]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE148]]
+; CHECK:       [[PRED_STORE_CONTINUE148]]:
+; CHECK-NEXT:    [[TMP233:%.*]] = extractelement <64 x i1> [[TMP1]], i32 11
+; CHECK-NEXT:    br i1 [[TMP233]], label %[[PRED_STORE_IF149:.*]], label %[[PRED_STORE_CONTINUE150:.*]]
+; CHECK:       [[PRED_STORE_IF149]]:
+; CHECK-NEXT:    [[TMP234:%.*]] = add i64 [[INDEX]], 75
+; CHECK-NEXT:    [[TMP235:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP234]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP235]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE150]]
+; CHECK:       [[PRED_STORE_CONTINUE150]]:
+; CHECK-NEXT:    [[TMP236:%.*]] = extractelement <64 x i1> [[TMP1]], i32 12
+; CHECK-NEXT:    br i1 [[TMP236]], label %[[PRED_STORE_IF151:.*]], label %[[PRED_STORE_CONTINUE152:.*]]
+; CHECK:       [[PRED_STORE_IF151]]:
+; CHECK-NEXT:    [[TMP237:%.*]] = add i64 [[INDEX]], 76
+; CHECK-NEXT:    [[TMP238:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP237]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP238]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE152]]
+; CHECK:       [[PRED_STORE_CONTINUE152]]:
+; CHECK-NEXT:    [[TMP239:%.*]] = extractelement <64 x i1> [[TMP1]], i32 13
+; CHECK-NEXT:    br i1 [[TMP239]], label %[[PRED_STORE_IF153:.*]], label %[[PRED_STORE_CONTINUE154:.*]]
+; CHECK:       [[PRED_STORE_IF153]]:
+; CHECK-NEXT:    [[TMP240:%.*]] = add i64 [[INDEX]], 77
+; CHECK-NEXT:    [[TMP241:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP240]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP241]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE154]]
+; CHECK:       [[PRED_STORE_CONTINUE154]]:
+; CHECK-NEXT:    [[TMP242:%.*]] = extractelement <64 x i1> [[TMP1]], i32 14
+; CHECK-NEXT:    br i1 [[TMP242]], label %[[PRED_STORE_IF155:.*]], label %[[PRED_STORE_CONTINUE156:.*]]
+; CHECK:       [[PRED_STORE_IF155]]:
+; CHECK-NEXT:    [[TMP243:%.*]] = add i64 [[INDEX]], 78
+; CHECK-NEXT:    [[TMP244:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP243]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP244]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE156]]
+; CHECK:       [[PRED_STORE_CONTINUE156]]:
+; CHECK-NEXT:    [[TMP245:%.*]] = extractelement <64 x i1> [[TMP1]], i32 15
+; CHECK-NEXT:    br i1 [[TMP245]], label %[[PRED_STORE_IF157:.*]], label %[[PRED_STORE_CONTINUE158:.*]]
+; CHECK:       [[PRED_STORE_IF157]]:
+; CHECK-NEXT:    [[TMP246:%.*]] = add i64 [[INDEX]], 79
+; CHECK-NEXT:    [[TMP247:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP246]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP247]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE158]]
+; CHECK:       [[PRED_STORE_CONTINUE158]]:
+; CHECK-NEXT:    [[TMP248:%.*]] = extractelement <64 x i1> [[TMP1]], i32 16
+; CHECK-NEXT:    br i1 [[TMP248]], label %[[PRED_STORE_IF159:.*]], label %[[PRED_STORE_CONTINUE160:.*]]
+; CHECK:       [[PRED_STORE_IF159]]:
+; CHECK-NEXT:    [[TMP249:%.*]] = add i64 [[INDEX]], 80
+; CHECK-NEXT:    [[TMP250:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP249]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP250]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE160]]
+; CHECK:       [[PRED_STORE_CONTINUE160]]:
+; CHECK-NEXT:    [[TMP251:%.*]] = extractelement <64 x i1> [[TMP1]], i32 17
+; CHECK-NEXT:    br i1 [[TMP251]], label %[[PRED_STORE_IF161:.*]], label %[[PRED_STORE_CONTINUE162:.*]]
+; CHECK:       [[PRED_STORE_IF161]]:
+; CHECK-NEXT:    [[TMP252:%.*]] = add i64 [[INDEX]], 81
+; CHECK-NEXT:    [[TMP253:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP252]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP253]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE162]]
+; CHECK:       [[PRED_STORE_CONTINUE162]]:
+; CHECK-NEXT:    [[TMP254:%.*]] = extractelement <64 x i1> [[TMP1]], i32 18
+; CHECK-NEXT:    br i1 [[TMP254]], label %[[PRED_STORE_IF163:.*]], label %[[PRED_STORE_CONTINUE164:.*]]
+; CHECK:       [[PRED_STORE_IF163]]:
+; CHECK-NEXT:    [[TMP255:%.*]] = add i64 [[INDEX]], 82
+; CHECK-NEXT:    [[TMP256:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP255]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP256]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE164]]
+; CHECK:       [[PRED_STORE_CONTINUE164]]:
+; CHECK-NEXT:    [[TMP257:%.*]] = extractelement <64 x i1> [[TMP1]], i32 19
+; CHECK-NEXT:    br i1 [[TMP257]], label %[[PRED_STORE_IF165:.*]], label %[[PRED_STORE_CONTINUE166:.*]]
+; CHECK:       [[PRED_STORE_IF165]]:
+; CHECK-NEXT:    [[TMP258:%.*]] = add i64 [[INDEX]], 83
+; CHECK-NEXT:    [[TMP259:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP258]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP259]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE166]]
+; CHECK:       [[PRED_STORE_CONTINUE166]]:
+; CHECK-NEXT:    [[TMP260:%.*]] = extractelement <64 x i1> [[TMP1]], i32 20
+; CHECK-NEXT:    br i1 [[TMP260]], label %[[PRED_STORE_IF167:.*]], label %[[PRED_STORE_CONTINUE168:.*]]
+; CHECK:       [[PRED_STORE_IF167]]:
+; CHECK-NEXT:    [[TMP261:%.*]] = add i64 [[INDEX]], 84
+; CHECK-NEXT:    [[TMP262:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP261]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP262]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE168]]
+; CHECK:       [[PRED_STORE_CONTINUE168]]:
+; CHECK-NEXT:    [[TMP263:%.*]] = extractelement <64 x i1> [[TMP1]], i32 21
+; CHECK-NEXT:    br i1 [[TMP263]], label %[[PRED_STORE_IF169:.*]], label %[[PRED_STORE_CONTINUE170:.*]]
+; CHECK:       [[PRED_STORE_IF169]]:
+; CHECK-NEXT:    [[TMP264:%.*]] = add i64 [[INDEX]], 85
+; CHECK-NEXT:    [[TMP265:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP264]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP265]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE170]]
+; CHECK:       [[PRED_STORE_CONTINUE170]]:
+; CHECK-NEXT:    [[TMP266:%.*]] = extractelement <64 x i1> [[TMP1]], i32 22
+; CHECK-NEXT:    br i1 [[TMP266]], label %[[PRED_STORE_IF171:.*]], label %[[PRED_STORE_CONTINUE172:.*]]
+; CHECK:       [[PRED_STORE_IF171]]:
+; CHECK-NEXT:    [[TMP267:%.*]] = add i64 [[INDEX]], 86
+; CHECK-NEXT:    [[TMP268:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP267]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP268]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE172]]
+; CHECK:       [[PRED_STORE_CONTINUE172]]:
+; CHECK-NEXT:    [[TMP269:%.*]] = extractelement <64 x i1> [[TMP1]], i32 23
+; CHECK-NEXT:    br i1 [[TMP269]], label %[[PRED_STORE_IF173:.*]], label %[[PRED_STORE_CONTINUE174:.*]]
+; CHECK:       [[PRED_STORE_IF173]]:
+; CHECK-NEXT:    [[TMP270:%.*]] = add i64 [[INDEX]], 87
+; CHECK-NEXT:    [[TMP271:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP270]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP271]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE174]]
+; CHECK:       [[PRED_STORE_CONTINUE174]]:
+; CHECK-NEXT:    [[TMP272:%.*]] = extractelement <64 x i1> [[TMP1]], i32 24
+; CHECK-NEXT:    br i1 [[TMP272]], label %[[PRED_STORE_IF175:.*]], label %[[PRED_STORE_CONTINUE176:.*]]
+; CHECK:       [[PRED_STORE_IF175]]:
+; CHECK-NEXT:    [[TMP273:%.*]] = add i64 [[INDEX]], 88
+; CHECK-NEXT:    [[TMP274:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP273]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP274]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE176]]
+; CHECK:       [[PRED_STORE_CONTINUE176]]:
+; CHECK-NEXT:    [[TMP275:%.*]] = extractelement <64 x i1> [[TMP1]], i32 25
+; CHECK-NEXT:    br i1 [[TMP275]], label %[[PRED_STORE_IF177:.*]], label %[[PRED_STORE_CONTINUE178:.*]]
+; CHECK:       [[PRED_STORE_IF177]]:
+; CHECK-NEXT:    [[TMP276:%.*]] = add i64 [[INDEX]], 89
+; CHECK-NEXT:    [[TMP277:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP276]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP277]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE178]]
+; CHECK:       [[PRED_STORE_CONTINUE178]]:
+; CHECK-NEXT:    [[TMP278:%.*]] = extractelement <64 x i1> [[TMP1]], i32 26
+; CHECK-NEXT:    br i1 [[TMP278]], label %[[PRED_STORE_IF179:.*]], label %[[PRED_STORE_CONTINUE180:.*]]
+; CHECK:       [[PRED_STORE_IF179]]:
+; CHECK-NEXT:    [[TMP279:%.*]] = add i64 [[INDEX]], 90
+; CHECK-NEXT:    [[TMP280:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP279]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP280]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE180]]
+; CHECK:       [[PRED_STORE_CONTINUE180]]:
+; CHECK-NEXT:    [[TMP281:%.*]] = extractelement <64 x i1> [[TMP1]], i32 27
+; CHECK-NEXT:    br i1 [[TMP281]], label %[[PRED_STORE_IF181:.*]], label %[[PRED_STORE_CONTINUE182:.*]]
+; CHECK:       [[PRED_STORE_IF181]]:
+; CHECK-NEXT:    [[TMP282:%.*]] = add i64 [[INDEX]], 91
+; CHECK-NEXT:    [[TMP283:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP282]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP283]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE182]]
+; CHECK:       [[PRED_STORE_CONTINUE182]]:
+; CHECK-NEXT:    [[TMP284:%.*]] = extractelement <64 x i1> [[TMP1]], i32 28
+; CHECK-NEXT:    br i1 [[TMP284]], label %[[PRED_STORE_IF183:.*]], label %[[PRED_STORE_CONTINUE184:.*]]
+; CHECK:       [[PRED_STORE_IF183]]:
+; CHECK-NEXT:    [[TMP285:%.*]] = add i64 [[INDEX]], 92
+; CHECK-NEXT:    [[TMP286:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP285]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP286]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE184]]
+; CHECK:       [[PRED_STORE_CONTINUE184]]:
+; CHECK-NEXT:    [[TMP287:%.*]] = extractelement <64 x i1> [[TMP1]], i32 29
+; CHECK-NEXT:    br i1 [[TMP287]], label %[[PRED_STORE_IF185:.*]], label %[[PRED_STORE_CONTINUE186:.*]]
+; CHECK:       [[PRED_STORE_IF185]]:
+; CHECK-NEXT:    [[TMP288:%.*]] = add i64 [[INDEX]], 93
+; CHECK-NEXT:    [[TMP289:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP288]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP289]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE186]]
+; CHECK:       [[PRED_STORE_CONTINUE186]]:
+; CHECK-NEXT:    [[TMP290:%.*]] = extractelement <64 x i1> [[TMP1]], i32 30
+; CHECK-NEXT:    br i1 [[TMP290]], label %[[PRED_STORE_IF187:.*]], label %[[PRED_STORE_CONTINUE188:.*]]
+; CHECK:       [[PRED_STORE_IF187]]:
+; CHECK-NEXT:    [[TMP291:%.*]] = add i64 [[INDEX]], 94
+; CHECK-NEXT:    [[TMP292:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP291]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP292]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE188]]
+; CHECK:       [[PRED_STORE_CONTINUE188]]:
+; CHECK-NEXT:    [[TMP293:%.*]] = extractelement <64 x i1> [[TMP1]], i32 31
+; CHECK-NEXT:    br i1 [[TMP293]], label %[[PRED_STORE_IF189:.*]], label %[[PRED_STORE_CONTINUE190:.*]]
+; CHECK:       [[PRED_STORE_IF189]]:
+; CHECK-NEXT:    [[TMP294:%.*]] = add i64 [[INDEX]], 95
+; CHECK-NEXT:    [[TMP295:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP294]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP295]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE190]]
+; CHECK:       [[PRED_STORE_CONTINUE190]]:
+; CHECK-NEXT:    [[TMP296:%.*]] = extractelement <64 x i1> [[TMP1]], i32 32
+; CHECK-NEXT:    br i1 [[TMP296]], label %[[PRED_STORE_IF191:.*]], label %[[PRED_STORE_CONTINUE192:.*]]
+; CHECK:       [[PRED_STORE_IF191]]:
+; CHECK-NEXT:    [[TMP297:%.*]] = add i64 [[INDEX]], 96
+; CHECK-NEXT:    [[TMP298:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP297]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP298]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE192]]
+; CHECK:       [[PRED_STORE_CONTINUE192]]:
+; CHECK-NEXT:    [[TMP299:%.*]] = extractelement <64 x i1> [[TMP1]], i32 33
+; CHECK-NEXT:    br i1 [[TMP299]], label %[[PRED_STORE_IF193:.*]], label %[[PRED_STORE_CONTINUE194:.*]]
+; CHECK:       [[PRED_STORE_IF193]]:
+; CHECK-NEXT:    [[TMP300:%.*]] = add i64 [[INDEX]], 97
+; CHECK-NEXT:    [[TMP301:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP300]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP301]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE194]]
+; CHECK:       [[PRED_STORE_CONTINUE194]]:
+; CHECK-NEXT:    [[TMP302:%.*]] = extractelement <64 x i1> [[TMP1]], i32 34
+; CHECK-NEXT:    br i1 [[TMP302]], label %[[PRED_STORE_IF195:.*]], label %[[PRED_STORE_CONTINUE196:.*]]
+; CHECK:       [[PRED_STORE_IF195]]:
+; CHECK-NEXT:    [[TMP303:%.*]] = add i64 [[INDEX]], 98
+; CHECK-NEXT:    [[TMP304:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP303]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP304]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE196]]
+; CHECK:       [[PRED_STORE_CONTINUE196]]:
+; CHECK-NEXT:    [[TMP305:%.*]] = extractelement <64 x i1> [[TMP1]], i32 35
+; CHECK-NEXT:    br i1 [[TMP305]], label %[[PRED_STORE_IF197:.*]], label %[[PRED_STORE_CONTINUE198:.*]]
+; CHECK:       [[PRED_STORE_IF197]]:
+; CHECK-NEXT:    [[TMP306:%.*]] = add i64 [[INDEX]], 99
+; CHECK-NEXT:    [[TMP307:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP306]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP307]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE198]]
+; CHECK:       [[PRED_STORE_CONTINUE198]]:
+; CHECK-NEXT:    [[TMP308:%.*]] = extractelement <64 x i1> [[TMP1]], i32 36
+; CHECK-NEXT:    br i1 [[TMP308]], label %[[PRED_STORE_IF199:.*]], label %[[PRED_STORE_CONTINUE200:.*]]
+; CHECK:       [[PRED_STORE_IF199]]:
+; CHECK-NEXT:    [[TMP309:%.*]] = add i64 [[INDEX]], 100
+; CHECK-NEXT:    [[TMP310:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP309]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP310]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE200]]
+; CHECK:       [[PRED_STORE_CONTINUE200]]:
+; CHECK-NEXT:    [[TMP311:%.*]] = extractelement <64 x i1> [[TMP1]], i32 37
+; CHECK-NEXT:    br i1 [[TMP311]], label %[[PRED_STORE_IF201:.*]], label %[[PRED_STORE_CONTINUE202:.*]]
+; CHECK:       [[PRED_STORE_IF201]]:
+; CHECK-NEXT:    [[TMP312:%.*]] = add i64 [[INDEX]], 101
+; CHECK-NEXT:    [[TMP313:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP312]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP313]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE202]]
+; CHECK:       [[PRED_STORE_CONTINUE202]]:
+; CHECK-NEXT:    [[TMP314:%.*]] = extractelement <64 x i1> [[TMP1]], i32 38
+; CHECK-NEXT:    br i1 [[TMP314]], label %[[PRED_STORE_IF203:.*]], label %[[PRED_STORE_CONTINUE204:.*]]
+; CHECK:       [[PRED_STORE_IF203]]:
+; CHECK-NEXT:    [[TMP315:%.*]] = add i64 [[INDEX]], 102
+; CHECK-NEXT:    [[TMP316:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP315]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP316]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE204]]
+; CHECK:       [[PRED_STORE_CONTINUE204]]:
+; CHECK-NEXT:    [[TMP317:%.*]] = extractelement <64 x i1> [[TMP1]], i32 39
+; CHECK-NEXT:    br i1 [[TMP317]], label %[[PRED_STORE_IF205:.*]], label %[[PRED_STORE_CONTINUE206:.*]]
+; CHECK:       [[PRED_STORE_IF205]]:
+; CHECK-NEXT:    [[TMP318:%.*]] = add i64 [[INDEX]], 103
+; CHECK-NEXT:    [[TMP319:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP318]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP319]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE206]]
+; CHECK:       [[PRED_STORE_CONTINUE206]]:
+; CHECK-NEXT:    [[TMP320:%.*]] = extractelement <64 x i1> [[TMP1]], i32 40
+; CHECK-NEXT:    br i1 [[TMP320]], label %[[PRED_STORE_IF207:.*]], label %[[PRED_STORE_CONTINUE208:.*]]
+; CHECK:       [[PRED_STORE_IF207]]:
+; CHECK-NEXT:    [[TMP321:%.*]] = add i64 [[INDEX]], 104
+; CHECK-NEXT:    [[TMP322:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP321]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP322]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE208]]
+; CHECK:       [[PRED_STORE_CONTINUE208]]:
+; CHECK-NEXT:    [[TMP323:%.*]] = extractelement <64 x i1> [[TMP1]], i32 41
+; CHECK-NEXT:    br i1 [[TMP323]], label %[[PRED_STORE_IF209:.*]], label %[[PRED_STORE_CONTINUE210:.*]]
+; CHECK:       [[PRED_STORE_IF209]]:
+; CHECK-NEXT:    [[TMP324:%.*]] = add i64 [[INDEX]], 105
+; CHECK-NEXT:    [[TMP325:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP324]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP325]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE210]]
+; CHECK:       [[PRED_STORE_CONTINUE210]]:
+; CHECK-NEXT:    [[TMP326:%.*]] = extractelement <64 x i1> [[TMP1]], i32 42
+; CHECK-NEXT:    br i1 [[TMP326]], label %[[PRED_STORE_IF211:.*]], label %[[PRED_STORE_CONTINUE212:.*]]
+; CHECK:       [[PRED_STORE_IF211]]:
+; CHECK-NEXT:    [[TMP327:%.*]] = add i64 [[INDEX]], 106
+; CHECK-NEXT:    [[TMP328:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP327]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP328]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE212]]
+; CHECK:       [[PRED_STORE_CONTINUE212]]:
+; CHECK-NEXT:    [[TMP329:%.*]] = extractelement <64 x i1> [[TMP1]], i32 43
+; CHECK-NEXT:    br i1 [[TMP329]], label %[[PRED_STORE_IF213:.*]], label %[[PRED_STORE_CONTINUE214:.*]]
+; CHECK:       [[PRED_STORE_IF213]]:
+; CHECK-NEXT:    [[TMP330:%.*]] = add i64 [[INDEX]], 107
+; CHECK-NEXT:    [[TMP331:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP330]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP331]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE214]]
+; CHECK:       [[PRED_STORE_CONTINUE214]]:
+; CHECK-NEXT:    [[TMP332:%.*]] = extractelement <64 x i1> [[TMP1]], i32 44
+; CHECK-NEXT:    br i1 [[TMP332]], label %[[PRED_STORE_IF215:.*]], label %[[PRED_STORE_CONTINUE216:.*]]
+; CHECK:       [[PRED_STORE_IF215]]:
+; CHECK-NEXT:    [[TMP333:%.*]] = add i64 [[INDEX]], 108
+; CHECK-NEXT:    [[TMP334:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP333]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP334]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE216]]
+; CHECK:       [[PRED_STORE_CONTINUE216]]:
+; CHECK-NEXT:    [[TMP335:%.*]] = extractelement <64 x i1> [[TMP1]], i32 45
+; CHECK-NEXT:    br i1 [[TMP335]], label %[[PRED_STORE_IF217:.*]], label %[[PRED_STORE_CONTINUE218:.*]]
+; CHECK:       [[PRED_STORE_IF217]]:
+; CHECK-NEXT:    [[TMP336:%.*]] = add i64 [[INDEX]], 109
+; CHECK-NEXT:    [[TMP337:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP336]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP337]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE218]]
+; CHECK:       [[PRED_STORE_CONTINUE218]]:
+; CHECK-NEXT:    [[TMP338:%.*]] = extractelement <64 x i1> [[TMP1]], i32 46
+; CHECK-NEXT:    br i1 [[TMP338]], label %[[PRED_STORE_IF219:.*]], label %[[PRED_STORE_CONTINUE220:.*]]
+; CHECK:       [[PRED_STORE_IF219]]:
+; CHECK-NEXT:    [[TMP339:%.*]] = add i64 [[INDEX]], 110
+; CHECK-NEXT:    [[TMP340:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP339]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP340]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE220]]
+; CHECK:       [[PRED_STORE_CONTINUE220]]:
+; CHECK-NEXT:    [[TMP341:%.*]] = extractelement <64 x i1> [[TMP1]], i32 47
+; CHECK-NEXT:    br i1 [[TMP341]], label %[[PRED_STORE_IF221:.*]], label %[[PRED_STORE_CONTINUE222:.*]]
+; CHECK:       [[PRED_STORE_IF221]]:
+; CHECK-NEXT:    [[TMP342:%.*]] = add i64 [[INDEX]], 111
+; CHECK-NEXT:    [[TMP343:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP342]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP343]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE222]]
+; CHECK:       [[PRED_STORE_CONTINUE222]]:
+; CHECK-NEXT:    [[TMP344:%.*]] = extractelement <64 x i1> [[TMP1]], i32 48
+; CHECK-NEXT:    br i1 [[TMP344]], label %[[PRED_STORE_IF223:.*]], label %[[PRED_STORE_CONTINUE224:.*]]
+; CHECK:       [[PRED_STORE_IF223]]:
+; CHECK-NEXT:    [[TMP345:%.*]] = add i64 [[INDEX]], 112
+; CHECK-NEXT:    [[TMP346:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP345]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP346]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE224]]
+; CHECK:       [[PRED_STORE_CONTINUE224]]:
+; CHECK-NEXT:    [[TMP347:%.*]] = extractelement <64 x i1> [[TMP1]], i32 49
+; CHECK-NEXT:    br i1 [[TMP347]], label %[[PRED_STORE_IF225:.*]], label %[[PRED_STORE_CONTINUE226:.*]]
+; CHECK:       [[PRED_STORE_IF225]]:
+; CHECK-NEXT:    [[TMP348:%.*]] = add i64 [[INDEX]], 113
+; CHECK-NEXT:    [[TMP349:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP348]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP349]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE226]]
+; CHECK:       [[PRED_STORE_CONTINUE226]]:
+; CHECK-NEXT:    [[TMP350:%.*]] = extractelement <64 x i1> [[TMP1]], i32 50
+; CHECK-NEXT:    br i1 [[TMP350]], label %[[PRED_STORE_IF227:.*]], label %[[PRED_STORE_CONTINUE228:.*]]
+; CHECK:       [[PRED_STORE_IF227]]:
+; CHECK-NEXT:    [[TMP351:%.*]] = add i64 [[INDEX]], 114
+; CHECK-NEXT:    [[TMP352:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP351]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP352]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE228]]
+; CHECK:       [[PRED_STORE_CONTINUE228]]:
+; CHECK-NEXT:    [[TMP353:%.*]] = extractelement <64 x i1> [[TMP1]], i32 51
+; CHECK-NEXT:    br i1 [[TMP353]], label %[[PRED_STORE_IF229:.*]], label %[[PRED_STORE_CONTINUE230:.*]]
+; CHECK:       [[PRED_STORE_IF229]]:
+; CHECK-NEXT:    [[TMP354:%.*]] = add i64 [[INDEX]], 115
+; CHECK-NEXT:    [[TMP355:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP354]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP355]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE230]]
+; CHECK:       [[PRED_STORE_CONTINUE230]]:
+; CHECK-NEXT:    [[TMP356:%.*]] = extractelement <64 x i1> [[TMP1]], i32 52
+; CHECK-NEXT:    br i1 [[TMP356]], label %[[PRED_STORE_IF231:.*]], label %[[PRED_STORE_CONTINUE232:.*]]
+; CHECK:       [[PRED_STORE_IF231]]:
+; CHECK-NEXT:    [[TMP357:%.*]] = add i64 [[INDEX]], 116
+; CHECK-NEXT:    [[TMP358:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP357]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP358]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE232]]
+; CHECK:       [[PRED_STORE_CONTINUE232]]:
+; CHECK-NEXT:    [[TMP359:%.*]] = extractelement <64 x i1> [[TMP1]], i32 53
+; CHECK-NEXT:    br i1 [[TMP359]], label %[[PRED_STORE_IF233:.*]], label %[[PRED_STORE_CONTINUE234:.*]]
+; CHECK:       [[PRED_STORE_IF233]]:
+; CHECK-NEXT:    [[TMP360:%.*]] = add i64 [[INDEX]], 117
+; CHECK-NEXT:    [[TMP361:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP360]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP361]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE234]]
+; CHECK:       [[PRED_STORE_CONTINUE234]]:
+; CHECK-NEXT:    [[TMP362:%.*]] = extractelement <64 x i1> [[TMP1]], i32 54
+; CHECK-NEXT:    br i1 [[TMP362]], label %[[PRED_STORE_IF235:.*]], label %[[PRED_STORE_CONTINUE236:.*]]
+; CHECK:       [[PRED_STORE_IF235]]:
+; CHECK-NEXT:    [[TMP363:%.*]] = add i64 [[INDEX]], 118
+; CHECK-NEXT:    [[TMP364:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP363]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP364]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE236]]
+; CHECK:       [[PRED_STORE_CONTINUE236]]:
+; CHECK-NEXT:    [[TMP365:%.*]] = extractelement <64 x i1> [[TMP1]], i32 55
+; CHECK-NEXT:    br i1 [[TMP365]], label %[[PRED_STORE_IF237:.*]], label %[[PRED_STORE_CONTINUE238:.*]]
+; CHECK:       [[PRED_STORE_IF237]]:
+; CHECK-NEXT:    [[TMP366:%.*]] = add i64 [[INDEX]], 119
+; CHECK-NEXT:    [[TMP367:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP366]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP367]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE238]]
+; CHECK:       [[PRED_STORE_CONTINUE238]]:
+; CHECK-NEXT:    [[TMP368:%.*]] = extractelement <64 x i1> [[TMP1]], i32 56
+; CHECK-NEXT:    br i1 [[TMP368]], label %[[PRED_STORE_IF239:.*]], label %[[PRED_STORE_CONTINUE240:.*]]
+; CHECK:       [[PRED_STORE_IF239]]:
+; CHECK-NEXT:    [[TMP369:%.*]] = add i64 [[INDEX]], 120
+; CHECK-NEXT:    [[TMP370:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP369]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP370]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE240]]
+; CHECK:       [[PRED_STORE_CONTINUE240]]:
+; CHECK-NEXT:    [[TMP371:%.*]] = extractelement <64 x i1> [[TMP1]], i32 57
+; CHECK-NEXT:    br i1 [[TMP371]], label %[[PRED_STORE_IF241:.*]], label %[[PRED_STORE_CONTINUE242:.*]]
+; CHECK:       [[PRED_STORE_IF241]]:
+; CHECK-NEXT:    [[TMP372:%.*]] = add i64 [[INDEX]], 121
+; CHECK-NEXT:    [[TMP373:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP372]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP373]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE242]]
+; CHECK:       [[PRED_STORE_CONTINUE242]]:
+; CHECK-NEXT:    [[TMP374:%.*]] = extractelement <64 x i1> [[TMP1]], i32 58
+; CHECK-NEXT:    br i1 [[TMP374]], label %[[PRED_STORE_IF243:.*]], label %[[PRED_STORE_CONTINUE244:.*]]
+; CHECK:       [[PRED_STORE_IF243]]:
+; CHECK-NEXT:    [[TMP375:%.*]] = add i64 [[INDEX]], 122
+; CHECK-NEXT:    [[TMP376:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP375]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP376]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE244]]
+; CHECK:       [[PRED_STORE_CONTINUE244]]:
+; CHECK-NEXT:    [[TMP377:%.*]] = extractelement <64 x i1> [[TMP1]], i32 59
+; CHECK-NEXT:    br i1 [[TMP377]], label %[[PRED_STORE_IF245:.*]], label %[[PRED_STORE_CONTINUE246:.*]]
+; CHECK:       [[PRED_STORE_IF245]]:
+; CHECK-NEXT:    [[TMP378:%.*]] = add i64 [[INDEX]], 123
+; CHECK-NEXT:    [[TMP379:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP378]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP379]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE246]]
+; CHECK:       [[PRED_STORE_CONTINUE246]]:
+; CHECK-NEXT:    [[TMP380:%.*]] = extractelement <64 x i1> [[TMP1]], i32 60
+; CHECK-NEXT:    br i1 [[TMP380]], label %[[PRED_STORE_IF247:.*]], label %[[PRED_STORE_CONTINUE248:.*]]
+; CHECK:       [[PRED_STORE_IF247]]:
+; CHECK-NEXT:    [[TMP381:%.*]] = add i64 [[INDEX]], 124
+; CHECK-NEXT:    [[TMP382:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP381]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP382]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE248]]
+; CHECK:       [[PRED_STORE_CONTINUE248]]:
+; CHECK-NEXT:    [[TMP383:%.*]] = extractelement <64 x i1> [[TMP1]], i32 61
+; CHECK-NEXT:    br i1 [[TMP383]], label %[[PRED_STORE_IF249:.*]], label %[[PRED_STORE_CONTINUE250:.*]]
+; CHECK:       [[PRED_STORE_IF249]]:
+; CHECK-NEXT:    [[TMP384:%.*]] = add i64 [[INDEX]], 125
+; CHECK-NEXT:    [[TMP385:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP384]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP385]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE250]]
+; CHECK:       [[PRED_STORE_CONTINUE250]]:
+; CHECK-NEXT:    [[TMP386:%.*]] = extractelement <64 x i1> [[TMP1]], i32 62
+; CHECK-NEXT:    br i1 [[TMP386]], label %[[PRED_STORE_IF251:.*]], label %[[PRED_STORE_CONTINUE252:.*]]
+; CHECK:       [[PRED_STORE_IF251]]:
+; CHECK-NEXT:    [[TMP387:%.*]] = add i64 [[INDEX]], 126
+; CHECK-NEXT:    [[TMP388:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP387]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP388]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE252]]
+; CHECK:       [[PRED_STORE_CONTINUE252]]:
+; CHECK-NEXT:    [[TMP389:%.*]] = extractelement <64 x i1> [[TMP1]], i32 63
+; CHECK-NEXT:    br i1 [[TMP389]], label %[[PRED_STORE_IF253:.*]], label %[[PRED_STORE_CONTINUE254:.*]]
+; CHECK:       [[PRED_STORE_IF253]]:
+; CHECK-NEXT:    [[TMP390:%.*]] = add i64 [[INDEX]], 127
+; CHECK-NEXT:    [[TMP391:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP390]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP391]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE254]]
+; CHECK:       [[PRED_STORE_CONTINUE254]]:
+; CHECK-NEXT:    [[TMP392:%.*]] = extractelement <64 x i1> [[TMP2]], i32 0
+; CHECK-NEXT:    br i1 [[TMP392]], label %[[PRED_STORE_IF255:.*]], label %[[PRED_STORE_CONTINUE256:.*]]
+; CHECK:       [[PRED_STORE_IF255]]:
+; CHECK-NEXT:    [[TMP393:%.*]] = add i64 [[INDEX]], 128
+; CHECK-NEXT:    [[TMP394:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP393]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP394]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE256]]
+; CHECK:       [[PRED_STORE_CONTINUE256]]:
+; CHECK-NEXT:    [[TMP395:%.*]] = extractelement <64 x i1> [[TMP2]], i32 1
+; CHECK-NEXT:    br i1 [[TMP395]], label %[[PRED_STORE_IF257:.*]], label %[[PRED_STORE_CONTINUE258:.*]]
+; CHECK:       [[PRED_STORE_IF257]]:
+; CHECK-NEXT:    [[TMP396:%.*]] = add i64 [[INDEX]], 129
+; CHECK-NEXT:    [[TMP397:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP396]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP397]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE258]]
+; CHECK:       [[PRED_STORE_CONTINUE258]]:
+; CHECK-NEXT:    [[TMP398:%.*]] = extractelement <64 x i1> [[TMP2]], i32 2
+; CHECK-NEXT:    br i1 [[TMP398]], label %[[PRED_STORE_IF259:.*]], label %[[PRED_STORE_CONTINUE260:.*]]
+; CHECK:       [[PRED_STORE_IF259]]:
+; CHECK-NEXT:    [[TMP399:%.*]] = add i64 [[INDEX]], 130
+; CHECK-NEXT:    [[TMP400:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP399]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP400]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE260]]
+; CHECK:       [[PRED_STORE_CONTINUE260]]:
+; CHECK-NEXT:    [[TMP401:%.*]] = extractelement <64 x i1> [[TMP2]], i32 3
+; CHECK-NEXT:    br i1 [[TMP401]], label %[[PRED_STORE_IF261:.*]], label %[[PRED_STORE_CONTINUE262:.*]]
+; CHECK:       [[PRED_STORE_IF261]]:
+; CHECK-NEXT:    [[TMP402:%.*]] = add i64 [[INDEX]], 131
+; CHECK-NEXT:    [[TMP403:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP402]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP403]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE262]]
+; CHECK:       [[PRED_STORE_CONTINUE262]]:
+; CHECK-NEXT:    [[TMP404:%.*]] = extractelement <64 x i1> [[TMP2]], i32 4
+; CHECK-NEXT:    br i1 [[TMP404]], label %[[PRED_STORE_IF263:.*]], label %[[PRED_STORE_CONTINUE264:.*]]
+; CHECK:       [[PRED_STORE_IF263]]:
+; CHECK-NEXT:    [[TMP405:%.*]] = add i64 [[INDEX]], 132
+; CHECK-NEXT:    [[TMP406:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP405]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP406]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE264]]
+; CHECK:       [[PRED_STORE_CONTINUE264]]:
+; CHECK-NEXT:    [[TMP407:%.*]] = extractelement <64 x i1> [[TMP2]], i32 5
+; CHECK-NEXT:    br i1 [[TMP407]], label %[[PRED_STORE_IF265:.*]], label %[[PRED_STORE_CONTINUE266:.*]]
+; CHECK:       [[PRED_STORE_IF265]]:
+; CHECK-NEXT:    [[TMP408:%.*]] = add i64 [[INDEX]], 133
+; CHECK-NEXT:    [[TMP409:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP408]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP409]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE266]]
+; CHECK:       [[PRED_STORE_CONTINUE266]]:
+; CHECK-NEXT:    [[TMP410:%.*]] = extractelement <64 x i1> [[TMP2]], i32 6
+; CHECK-NEXT:    br i1 [[TMP410]], label %[[PRED_STORE_IF267:.*]], label %[[PRED_STORE_CONTINUE268:.*]]
+; CHECK:       [[PRED_STORE_IF267]]:
+; CHECK-NEXT:    [[TMP411:%.*]] = add i64 [[INDEX]], 134
+; CHECK-NEXT:    [[TMP412:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP411]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP412]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE268]]
+; CHECK:       [[PRED_STORE_CONTINUE268]]:
+; CHECK-NEXT:    [[TMP413:%.*]] = extractelement <64 x i1> [[TMP2]], i32 7
+; CHECK-NEXT:    br i1 [[TMP413]], label %[[PRED_STORE_IF269:.*]], label %[[PRED_STORE_CONTINUE270:.*]]
+; CHECK:       [[PRED_STORE_IF269]]:
+; CHECK-NEXT:    [[TMP414:%.*]] = add i64 [[INDEX]], 135
+; CHECK-NEXT:    [[TMP415:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP414]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP415]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE270]]
+; CHECK:       [[PRED_STORE_CONTINUE270]]:
+; CHECK-NEXT:    [[TMP416:%.*]] = extractelement <64 x i1> [[TMP2]], i32 8
+; CHECK-NEXT:    br i1 [[TMP416]], label %[[PRED_STORE_IF271:.*]], label %[[PRED_STORE_CONTINUE272:.*]]
+; CHECK:       [[PRED_STORE_IF271]]:
+; CHECK-NEXT:    [[TMP417:%.*]] = add i64 [[INDEX]], 136
+; CHECK-NEXT:    [[TMP418:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP417]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP418]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE272]]
+; CHECK:       [[PRED_STORE_CONTINUE272]]:
+; CHECK-NEXT:    [[TMP419:%.*]] = extractelement <64 x i1> [[TMP2]], i32 9
+; CHECK-NEXT:    br i1 [[TMP419]], label %[[PRED_STORE_IF273:.*]], label %[[PRED_STORE_CONTINUE274:.*]]
+; CHECK:       [[PRED_STORE_IF273]]:
+; CHECK-NEXT:    [[TMP420:%.*]] = add i64 [[INDEX]], 137
+; CHECK-NEXT:    [[TMP421:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP420]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP421]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE274]]
+; CHECK:       [[PRED_STORE_CONTINUE274]]:
+; CHECK-NEXT:    [[TMP422:%.*]] = extractelement <64 x i1> [[TMP2]], i32 10
+; CHECK-NEXT:    br i1 [[TMP422]], label %[[PRED_STORE_IF275:.*]], label %[[PRED_STORE_CONTINUE276:.*]]
+; CHECK:       [[PRED_STORE_IF275]]:
+; CHECK-NEXT:    [[TMP423:%.*]] = add i64 [[INDEX]], 138
+; CHECK-NEXT:    [[TMP424:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP423]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP424]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE276]]
+; CHECK:       [[PRED_STORE_CONTINUE276]]:
+; CHECK-NEXT:    [[TMP425:%.*]] = extractelement <64 x i1> [[TMP2]], i32 11
+; CHECK-NEXT:    br i1 [[TMP425]], label %[[PRED_STORE_IF277:.*]], label %[[PRED_STORE_CONTINUE278:.*]]
+; CHECK:       [[PRED_STORE_IF277]]:
+; CHECK-NEXT:    [[TMP426:%.*]] = add i64 [[INDEX]], 139
+; CHECK-NEXT:    [[TMP427:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP426]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP427]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE278]]
+; CHECK:       [[PRED_STORE_CONTINUE278]]:
+; CHECK-NEXT:    [[TMP428:%.*]] = extractelement <64 x i1> [[TMP2]], i32 12
+; CHECK-NEXT:    br i1 [[TMP428]], label %[[PRED_STORE_IF279:.*]], label %[[PRED_STORE_CONTINUE280:.*]]
+; CHECK:       [[PRED_STORE_IF279]]:
+; CHECK-NEXT:    [[TMP429:%.*]] = add i64 [[INDEX]], 140
+; CHECK-NEXT:    [[TMP430:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP429]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP430]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE280]]
+; CHECK:       [[PRED_STORE_CONTINUE280]]:
+; CHECK-NEXT:    [[TMP431:%.*]] = extractelement <64 x i1> [[TMP2]], i32 13
+; CHECK-NEXT:    br i1 [[TMP431]], label %[[PRED_STORE_IF281:.*]], label %[[PRED_STORE_CONTINUE282:.*]]
+; CHECK:       [[PRED_STORE_IF281]]:
+; CHECK-NEXT:    [[TMP432:%.*]] = add i64 [[INDEX]], 141
+; CHECK-NEXT:    [[TMP433:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP432]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP433]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE282]]
+; CHECK:       [[PRED_STORE_CONTINUE282]]:
+; CHECK-NEXT:    [[TMP434:%.*]] = extractelement <64 x i1> [[TMP2]], i32 14
+; CHECK-NEXT:    br i1 [[TMP434]], label %[[PRED_STORE_IF283:.*]], label %[[PRED_STORE_CONTINUE284:.*]]
+; CHECK:       [[PRED_STORE_IF283]]:
+; CHECK-NEXT:    [[TMP435:%.*]] = add i64 [[INDEX]], 142
+; CHECK-NEXT:    [[TMP436:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP435]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP436]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE284]]
+; CHECK:       [[PRED_STORE_CONTINUE284]]:
+; CHECK-NEXT:    [[TMP437:%.*]] = extractelement <64 x i1> [[TMP2]], i32 15
+; CHECK-NEXT:    br i1 [[TMP437]], label %[[PRED_STORE_IF285:.*]], label %[[PRED_STORE_CONTINUE286:.*]]
+; CHECK:       [[PRED_STORE_IF285]]:
+; CHECK-NEXT:    [[TMP438:%.*]] = add i64 [[INDEX]], 143
+; CHECK-NEXT:    [[TMP439:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP438]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP439]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE286]]
+; CHECK:       [[PRED_STORE_CONTINUE286]]:
+; CHECK-NEXT:    [[TMP440:%.*]] = extractelement <64 x i1> [[TMP2]], i32 16
+; CHECK-NEXT:    br i1 [[TMP440]], label %[[PRED_STORE_IF287:.*]], label %[[PRED_STORE_CONTINUE288:.*]]
+; CHECK:       [[PRED_STORE_IF287]]:
+; CHECK-NEXT:    [[TMP441:%.*]] = add i64 [[INDEX]], 144
+; CHECK-NEXT:    [[TMP442:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP441]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP442]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE288]]
+; CHECK:       [[PRED_STORE_CONTINUE288]]:
+; CHECK-NEXT:    [[TMP443:%.*]] = extractelement <64 x i1> [[TMP2]], i32 17
+; CHECK-NEXT:    br i1 [[TMP443]], label %[[PRED_STORE_IF289:.*]], label %[[PRED_STORE_CONTINUE290:.*]]
+; CHECK:       [[PRED_STORE_IF289]]:
+; CHECK-NEXT:    [[TMP444:%.*]] = add i64 [[INDEX]], 145
+; CHECK-NEXT:    [[TMP445:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP444]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP445]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE290]]
+; CHECK:       [[PRED_STORE_CONTINUE290]]:
+; CHECK-NEXT:    [[TMP446:%.*]] = extractelement <64 x i1> [[TMP2]], i32 18
+; CHECK-NEXT:    br i1 [[TMP446]], label %[[PRED_STORE_IF291:.*]], label %[[PRED_STORE_CONTINUE292:.*]]
+; CHECK:       [[PRED_STORE_IF291]]:
+; CHECK-NEXT:    [[TMP447:%.*]] = add i64 [[INDEX]], 146
+; CHECK-NEXT:    [[TMP448:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP447]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP448]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE292]]
+; CHECK:       [[PRED_STORE_CONTINUE292]]:
+; CHECK-NEXT:    [[TMP449:%.*]] = extractelement <64 x i1> [[TMP2]], i32 19
+; CHECK-NEXT:    br i1 [[TMP449]], label %[[PRED_STORE_IF293:.*]], label %[[PRED_STORE_CONTINUE294:.*]]
+; CHECK:       [[PRED_STORE_IF293]]:
+; CHECK-NEXT:    [[TMP450:%.*]] = add i64 [[INDEX]], 147
+; CHECK-NEXT:    [[TMP451:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP450]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP451]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE294]]
+; CHECK:       [[PRED_STORE_CONTINUE294]]:
+; CHECK-NEXT:    [[TMP452:%.*]] = extractelement <64 x i1> [[TMP2]], i32 20
+; CHECK-NEXT:    br i1 [[TMP452]], label %[[PRED_STORE_IF295:.*]], label %[[PRED_STORE_CONTINUE296:.*]]
+; CHECK:       [[PRED_STORE_IF295]]:
+; CHECK-NEXT:    [[TMP453:%.*]] = add i64 [[INDEX]], 148
+; CHECK-NEXT:    [[TMP454:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP453]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP454]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE296]]
+; CHECK:       [[PRED_STORE_CONTINUE296]]:
+; CHECK-NEXT:    [[TMP455:%.*]] = extractelement <64 x i1> [[TMP2]], i32 21
+; CHECK-NEXT:    br i1 [[TMP455]], label %[[PRED_STORE_IF297:.*]], label %[[PRED_STORE_CONTINUE298:.*]]
+; CHECK:       [[PRED_STORE_IF297]]:
+; CHECK-NEXT:    [[TMP456:%.*]] = add i64 [[INDEX]], 149
+; CHECK-NEXT:    [[TMP457:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP456]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP457]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE298]]
+; CHECK:       [[PRED_STORE_CONTINUE298]]:
+; CHECK-NEXT:    [[TMP458:%.*]] = extractelement <64 x i1> [[TMP2]], i32 22
+; CHECK-NEXT:    br i1 [[TMP458]], label %[[PRED_STORE_IF299:.*]], label %[[PRED_STORE_CONTINUE300:.*]]
+; CHECK:       [[PRED_STORE_IF299]]:
+; CHECK-NEXT:    [[TMP459:%.*]] = add i64 [[INDEX]], 150
+; CHECK-NEXT:    [[TMP460:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP459]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP460]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE300]]
+; CHECK:       [[PRED_STORE_CONTINUE300]]:
+; CHECK-NEXT:    [[TMP461:%.*]] = extractelement <64 x i1> [[TMP2]], i32 23
+; CHECK-NEXT:    br i1 [[TMP461]], label %[[PRED_STORE_IF301:.*]], label %[[PRED_STORE_CONTINUE302:.*]]
+; CHECK:       [[PRED_STORE_IF301]]:
+; CHECK-NEXT:    [[TMP462:%.*]] = add i64 [[INDEX]], 151
+; CHECK-NEXT:    [[TMP463:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP462]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP463]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE302]]
+; CHECK:       [[PRED_STORE_CONTINUE302]]:
+; CHECK-NEXT:    [[TMP464:%.*]] = extractelement <64 x i1> [[TMP2]], i32 24
+; CHECK-NEXT:    br i1 [[TMP464]], label %[[PRED_STORE_IF303:.*]], label %[[PRED_STORE_CONTINUE304:.*]]
+; CHECK:       [[PRED_STORE_IF303]]:
+; CHECK-NEXT:    [[TMP465:%.*]] = add i64 [[INDEX]], 152
+; CHECK-NEXT:    [[TMP466:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP465]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP466]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE304]]
+; CHECK:       [[PRED_STORE_CONTINUE304]]:
+; CHECK-NEXT:    [[TMP467:%.*]] = extractelement <64 x i1> [[TMP2]], i32 25
+; CHECK-NEXT:    br i1 [[TMP467]], label %[[PRED_STORE_IF305:.*]], label %[[PRED_STORE_CONTINUE306:.*]]
+; CHECK:       [[PRED_STORE_IF305]]:
+; CHECK-NEXT:    [[TMP468:%.*]] = add i64 [[INDEX]], 153
+; CHECK-NEXT:    [[TMP469:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP468]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP469]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE306]]
+; CHECK:       [[PRED_STORE_CONTINUE306]]:
+; CHECK-NEXT:    [[TMP470:%.*]] = extractelement <64 x i1> [[TMP2]], i32 26
+; CHECK-NEXT:    br i1 [[TMP470]], label %[[PRED_STORE_IF307:.*]], label %[[PRED_STORE_CONTINUE308:.*]]
+; CHECK:       [[PRED_STORE_IF307]]:
+; CHECK-NEXT:    [[TMP471:%.*]] = add i64 [[INDEX]], 154
+; CHECK-NEXT:    [[TMP472:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP471]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP472]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE308]]
+; CHECK:       [[PRED_STORE_CONTINUE308]]:
+; CHECK-NEXT:    [[TMP473:%.*]] = extractelement <64 x i1> [[TMP2]], i32 27
+; CHECK-NEXT:    br i1 [[TMP473]], label %[[PRED_STORE_IF309:.*]], label %[[PRED_STORE_CONTINUE310:.*]]
+; CHECK:       [[PRED_STORE_IF309]]:
+; CHECK-NEXT:    [[TMP474:%.*]] = add i64 [[INDEX]], 155
+; CHECK-NEXT:    [[TMP475:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP474]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP475]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE310]]
+; CHECK:       [[PRED_STORE_CONTINUE310]]:
+; CHECK-NEXT:    [[TMP476:%.*]] = extractelement <64 x i1> [[TMP2]], i32 28
+; CHECK-NEXT:    br i1 [[TMP476]], label %[[PRED_STORE_IF311:.*]], label %[[PRED_STORE_CONTINUE312:.*]]
+; CHECK:       [[PRED_STORE_IF311]]:
+; CHECK-NEXT:    [[TMP477:%.*]] = add i64 [[INDEX]], 156
+; CHECK-NEXT:    [[TMP478:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP477]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP478]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE312]]
+; CHECK:       [[PRED_STORE_CONTINUE312]]:
+; CHECK-NEXT:    [[TMP479:%.*]] = extractelement <64 x i1> [[TMP2]], i32 29
+; CHECK-NEXT:    br i1 [[TMP479]], label %[[PRED_STORE_IF313:.*]], label %[[PRED_STORE_CONTINUE314:.*]]
+; CHECK:       [[PRED_STORE_IF313]]:
+; CHECK-NEXT:    [[TMP480:%.*]] = add i64 [[INDEX]], 157
+; CHECK-NEXT:    [[TMP481:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP480]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP481]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE314]]
+; CHECK:       [[PRED_STORE_CONTINUE314]]:
+; CHECK-NEXT:    [[TMP482:%.*]] = extractelement <64 x i1> [[TMP2]], i32 30
+; CHECK-NEXT:    br i1 [[TMP482]], label %[[PRED_STORE_IF315:.*]], label %[[PRED_STORE_CONTINUE316:.*]]
+; CHECK:       [[PRED_STORE_IF315]]:
+; CHECK-NEXT:    [[TMP483:%.*]] = add i64 [[INDEX]], 158
+; CHECK-NEXT:    [[TMP484:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP483]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP484]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE316]]
+; CHECK:       [[PRED_STORE_CONTINUE316]]:
+; CHECK-NEXT:    [[TMP485:%.*]] = extractelement <64 x i1> [[TMP2]], i32 31
+; CHECK-NEXT:    br i1 [[TMP485]], label %[[PRED_STORE_IF317:.*]], label %[[PRED_STORE_CONTINUE318:.*]]
+; CHECK:       [[PRED_STORE_IF317]]:
+; CHECK-NEXT:    [[TMP486:%.*]] = add i64 [[INDEX]], 159
+; CHECK-NEXT:    [[TMP487:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP486]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP487]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE318]]
+; CHECK:       [[PRED_STORE_CONTINUE318]]:
+; CHECK-NEXT:    [[TMP488:%.*]] = extractelement <64 x i1> [[TMP2]], i32 32
+; CHECK-NEXT:    br i1 [[TMP488]], label %[[PRED_STORE_IF319:.*]], label %[[PRED_STORE_CONTINUE320:.*]]
+; CHECK:       [[PRED_STORE_IF319]]:
+; CHECK-NEXT:    [[TMP489:%.*]] = add i64 [[INDEX]], 160
+; CHECK-NEXT:    [[TMP490:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP489]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP490]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE320]]
+; CHECK:       [[PRED_STORE_CONTINUE320]]:
+; CHECK-NEXT:    [[TMP491:%.*]] = extractelement <64 x i1> [[TMP2]], i32 33
+; CHECK-NEXT:    br i1 [[TMP491]], label %[[PRED_STORE_IF321:.*]], label %[[PRED_STORE_CONTINUE322:.*]]
+; CHECK:       [[PRED_STORE_IF321]]:
+; CHECK-NEXT:    [[TMP492:%.*]] = add i64 [[INDEX]], 161
+; CHECK-NEXT:    [[TMP493:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP492]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP493]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE322]]
+; CHECK:       [[PRED_STORE_CONTINUE322]]:
+; CHECK-NEXT:    [[TMP494:%.*]] = extractelement <64 x i1> [[TMP2]], i32 34
+; CHECK-NEXT:    br i1 [[TMP494]], label %[[PRED_STORE_IF323:.*]], label %[[PRED_STORE_CONTINUE324:.*]]
+; CHECK:       [[PRED_STORE_IF323]]:
+; CHECK-NEXT:    [[TMP495:%.*]] = add i64 [[INDEX]], 162
+; CHECK-NEXT:    [[TMP496:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP495]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP496]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE324]]
+; CHECK:       [[PRED_STORE_CONTINUE324]]:
+; CHECK-NEXT:    [[TMP497:%.*]] = extractelement <64 x i1> [[TMP2]], i32 35
+; CHECK-NEXT:    br i1 [[TMP497]], label %[[PRED_STORE_IF325:.*]], label %[[PRED_STORE_CONTINUE326:.*]]
+; CHECK:       [[PRED_STORE_IF325]]:
+; CHECK-NEXT:    [[TMP498:%.*]] = add i64 [[INDEX]], 163
+; CHECK-NEXT:    [[TMP499:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP498]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP499]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE326]]
+; CHECK:       [[PRED_STORE_CONTINUE326]]:
+; CHECK-NEXT:    [[TMP500:%.*]] = extractelement <64 x i1> [[TMP2]], i32 36
+; CHECK-NEXT:    br i1 [[TMP500]], label %[[PRED_STORE_IF327:.*]], label %[[PRED_STORE_CONTINUE328:.*]]
+; CHECK:       [[PRED_STORE_IF327]]:
+; CHECK-NEXT:    [[TMP501:%.*]] = add i64 [[INDEX]], 164
+; CHECK-NEXT:    [[TMP502:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP501]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP502]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE328]]
+; CHECK:       [[PRED_STORE_CONTINUE328]]:
+; CHECK-NEXT:    [[TMP503:%.*]] = extractelement <64 x i1> [[TMP2]], i32 37
+; CHECK-NEXT:    br i1 [[TMP503]], label %[[PRED_STORE_IF329:.*]], label %[[PRED_STORE_CONTINUE330:.*]]
+; CHECK:       [[PRED_STORE_IF329]]:
+; CHECK-NEXT:    [[TMP504:%.*]] = add i64 [[INDEX]], 165
+; CHECK-NEXT:    [[TMP505:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP504]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP505]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE330]]
+; CHECK:       [[PRED_STORE_CONTINUE330]]:
+; CHECK-NEXT:    [[TMP506:%.*]] = extractelement <64 x i1> [[TMP2]], i32 38
+; CHECK-NEXT:    br i1 [[TMP506]], label %[[PRED_STORE_IF331:.*]], label %[[PRED_STORE_CONTINUE332:.*]]
+; CHECK:       [[PRED_STORE_IF331]]:
+; CHECK-NEXT:    [[TMP507:%.*]] = add i64 [[INDEX]], 166
+; CHECK-NEXT:    [[TMP508:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP507]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP508]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE332]]
+; CHECK:       [[PRED_STORE_CONTINUE332]]:
+; CHECK-NEXT:    [[TMP509:%.*]] = extractelement <64 x i1> [[TMP2]], i32 39
+; CHECK-NEXT:    br i1 [[TMP509]], label %[[PRED_STORE_IF333:.*]], label %[[PRED_STORE_CONTINUE334:.*]]
+; CHECK:       [[PRED_STORE_IF333]]:
+; CHECK-NEXT:    [[TMP510:%.*]] = add i64 [[INDEX]], 167
+; CHECK-NEXT:    [[TMP511:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP510]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP511]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE334]]
+; CHECK:       [[PRED_STORE_CONTINUE334]]:
+; CHECK-NEXT:    [[TMP512:%.*]] = extractelement <64 x i1> [[TMP2]], i32 40
+; CHECK-NEXT:    br i1 [[TMP512]], label %[[PRED_STORE_IF335:.*]], label %[[PRED_STORE_CONTINUE336:.*]]
+; CHECK:       [[PRED_STORE_IF335]]:
+; CHECK-NEXT:    [[TMP513:%.*]] = add i64 [[INDEX]], 168
+; CHECK-NEXT:    [[TMP514:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP513]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP514]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE336]]
+; CHECK:       [[PRED_STORE_CONTINUE336]]:
+; CHECK-NEXT:    [[TMP515:%.*]] = extractelement <64 x i1> [[TMP2]], i32 41
+; CHECK-NEXT:    br i1 [[TMP515]], label %[[PRED_STORE_IF337:.*]], label %[[PRED_STORE_CONTINUE338:.*]]
+; CHECK:       [[PRED_STORE_IF337]]:
+; CHECK-NEXT:    [[TMP516:%.*]] = add i64 [[INDEX]], 169
+; CHECK-NEXT:    [[TMP517:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP516]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP517]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE338]]
+; CHECK:       [[PRED_STORE_CONTINUE338]]:
+; CHECK-NEXT:    [[TMP518:%.*]] = extractelement <64 x i1> [[TMP2]], i32 42
+; CHECK-NEXT:    br i1 [[TMP518]], label %[[PRED_STORE_IF339:.*]], label %[[PRED_STORE_CONTINUE340:.*]]
+; CHECK:       [[PRED_STORE_IF339]]:
+; CHECK-NEXT:    [[TMP519:%.*]] = add i64 [[INDEX]], 170
+; CHECK-NEXT:    [[TMP520:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP519]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP520]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE340]]
+; CHECK:       [[PRED_STORE_CONTINUE340]]:
+; CHECK-NEXT:    [[TMP521:%.*]] = extractelement <64 x i1> [[TMP2]], i32 43
+; CHECK-NEXT:    br i1 [[TMP521]], label %[[PRED_STORE_IF341:.*]], label %[[PRED_STORE_CONTINUE342:.*]]
+; CHECK:       [[PRED_STORE_IF341]]:
+; CHECK-NEXT:    [[TMP522:%.*]] = add i64 [[INDEX]], 171
+; CHECK-NEXT:    [[TMP523:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP522]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP523]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE342]]
+; CHECK:       [[PRED_STORE_CONTINUE342]]:
+; CHECK-NEXT:    [[TMP524:%.*]] = extractelement <64 x i1> [[TMP2]], i32 44
+; CHECK-NEXT:    br i1 [[TMP524]], label %[[PRED_STORE_IF343:.*]], label %[[PRED_STORE_CONTINUE344:.*]]
+; CHECK:       [[PRED_STORE_IF343]]:
+; CHECK-NEXT:    [[TMP525:%.*]] = add i64 [[INDEX]], 172
+; CHECK-NEXT:    [[TMP526:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP525]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP526]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE344]]
+; CHECK:       [[PRED_STORE_CONTINUE344]]:
+; CHECK-NEXT:    [[TMP527:%.*]] = extractelement <64 x i1> [[TMP2]], i32 45
+; CHECK-NEXT:    br i1 [[TMP527]], label %[[PRED_STORE_IF345:.*]], label %[[PRED_STORE_CONTINUE346:.*]]
+; CHECK:       [[PRED_STORE_IF345]]:
+; CHECK-NEXT:    [[TMP528:%.*]] = add i64 [[INDEX]], 173
+; CHECK-NEXT:    [[TMP529:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP528]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP529]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE346]]
+; CHECK:       [[PRED_STORE_CONTINUE346]]:
+; CHECK-NEXT:    [[TMP530:%.*]] = extractelement <64 x i1> [[TMP2]], i32 46
+; CHECK-NEXT:    br i1 [[TMP530]], label %[[PRED_STORE_IF347:.*]], label %[[PRED_STORE_CONTINUE348:.*]]
+; CHECK:       [[PRED_STORE_IF347]]:
+; CHECK-NEXT:    [[TMP531:%.*]] = add i64 [[INDEX]], 174
+; CHECK-NEXT:    [[TMP532:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP531]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP532]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE348]]
+; CHECK:       [[PRED_STORE_CONTINUE348]]:
+; CHECK-NEXT:    [[TMP533:%.*]] = extractelement <64 x i1> [[TMP2]], i32 47
+; CHECK-NEXT:    br i1 [[TMP533]], label %[[PRED_STORE_IF349:.*]], label %[[PRED_STORE_CONTINUE350:.*]]
+; CHECK:       [[PRED_STORE_IF349]]:
+; CHECK-NEXT:    [[TMP534:%.*]] = add i64 [[INDEX]], 175
+; CHECK-NEXT:    [[TMP535:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP534]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP535]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE350]]
+; CHECK:       [[PRED_STORE_CONTINUE350]]:
+; CHECK-NEXT:    [[TMP536:%.*]] = extractelement <64 x i1> [[TMP2]], i32 48
+; CHECK-NEXT:    br i1 [[TMP536]], label %[[PRED_STORE_IF351:.*]], label %[[PRED_STORE_CONTINUE352:.*]]
+; CHECK:       [[PRED_STORE_IF351]]:
+; CHECK-NEXT:    [[TMP537:%.*]] = add i64 [[INDEX]], 176
+; CHECK-NEXT:    [[TMP538:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP537]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP538]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE352]]
+; CHECK:       [[PRED_STORE_CONTINUE352]]:
+; CHECK-NEXT:    [[TMP539:%.*]] = extractelement <64 x i1> [[TMP2]], i32 49
+; CHECK-NEXT:    br i1 [[TMP539]], label %[[PRED_STORE_IF353:.*]], label %[[PRED_STORE_CONTINUE354:.*]]
+; CHECK:       [[PRED_STORE_IF353]]:
+; CHECK-NEXT:    [[TMP540:%.*]] = add i64 [[INDEX]], 177
+; CHECK-NEXT:    [[TMP541:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP540]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP541]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE354]]
+; CHECK:       [[PRED_STORE_CONTINUE354]]:
+; CHECK-NEXT:    [[TMP542:%.*]] = extractelement <64 x i1> [[TMP2]], i32 50
+; CHECK-NEXT:    br i1 [[TMP542]], label %[[PRED_STORE_IF355:.*]], label %[[PRED_STORE_CONTINUE356:.*]]
+; CHECK:       [[PRED_STORE_IF355]]:
+; CHECK-NEXT:    [[TMP543:%.*]] = add i64 [[INDEX]], 178
+; CHECK-NEXT:    [[TMP544:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP543]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP544]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE356]]
+; CHECK:       [[PRED_STORE_CONTINUE356]]:
+; CHECK-NEXT:    [[TMP545:%.*]] = extractelement <64 x i1> [[TMP2]], i32 51
+; CHECK-NEXT:    br i1 [[TMP545]], label %[[PRED_STORE_IF357:.*]], label %[[PRED_STORE_CONTINUE358:.*]]
+; CHECK:       [[PRED_STORE_IF357]]:
+; CHECK-NEXT:    [[TMP546:%.*]] = add i64 [[INDEX]], 179
+; CHECK-NEXT:    [[TMP547:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP546]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP547]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE358]]
+; CHECK:       [[PRED_STORE_CONTINUE358]]:
+; CHECK-NEXT:    [[TMP548:%.*]] = extractelement <64 x i1> [[TMP2]], i32 52
+; CHECK-NEXT:    br i1 [[TMP548]], label %[[PRED_STORE_IF359:.*]], label %[[PRED_STORE_CONTINUE360:.*]]
+; CHECK:       [[PRED_STORE_IF359]]:
+; CHECK-NEXT:    [[TMP549:%.*]] = add i64 [[INDEX]], 180
+; CHECK-NEXT:    [[TMP550:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP549]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP550]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE360]]
+; CHECK:       [[PRED_STORE_CONTINUE360]]:
+; CHECK-NEXT:    [[TMP551:%.*]] = extractelement <64 x i1> [[TMP2]], i32 53
+; CHECK-NEXT:    br i1 [[TMP551]], label %[[PRED_STORE_IF361:.*]], label %[[PRED_STORE_CONTINUE362:.*]]
+; CHECK:       [[PRED_STORE_IF361]]:
+; CHECK-NEXT:    [[TMP552:%.*]] = add i64 [[INDEX]], 181
+; CHECK-NEXT:    [[TMP553:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP552]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP553]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE362]]
+; CHECK:       [[PRED_STORE_CONTINUE362]]:
+; CHECK-NEXT:    [[TMP554:%.*]] = extractelement <64 x i1> [[TMP2]], i32 54
+; CHECK-NEXT:    br i1 [[TMP554]], label %[[PRED_STORE_IF363:.*]], label %[[PRED_STORE_CONTINUE364:.*]]
+; CHECK:       [[PRED_STORE_IF363]]:
+; CHECK-NEXT:    [[TMP555:%.*]] = add i64 [[INDEX]], 182
+; CHECK-NEXT:    [[TMP556:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP555]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP556]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE364]]
+; CHECK:       [[PRED_STORE_CONTINUE364]]:
+; CHECK-NEXT:    [[TMP557:%.*]] = extractelement <64 x i1> [[TMP2]], i32 55
+; CHECK-NEXT:    br i1 [[TMP557]], label %[[PRED_STORE_IF365:.*]], label %[[PRED_STORE_CONTINUE366:.*]]
+; CHECK:       [[PRED_STORE_IF365]]:
+; CHECK-NEXT:    [[TMP558:%.*]] = add i64 [[INDEX]], 183
+; CHECK-NEXT:    [[TMP559:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP558]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP559]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE366]]
+; CHECK:       [[PRED_STORE_CONTINUE366]]:
+; CHECK-NEXT:    [[TMP560:%.*]] = extractelement <64 x i1> [[TMP2]], i32 56
+; CHECK-NEXT:    br i1 [[TMP560]], label %[[PRED_STORE_IF367:.*]], label %[[PRED_STORE_CONTINUE368:.*]]
+; CHECK:       [[PRED_STORE_IF367]]:
+; CHECK-NEXT:    [[TMP561:%.*]] = add i64 [[INDEX]], 184
+; CHECK-NEXT:    [[TMP562:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP561]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP562]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE368]]
+; CHECK:       [[PRED_STORE_CONTINUE368]]:
+; CHECK-NEXT:    [[TMP563:%.*]] = extractelement <64 x i1> [[TMP2]], i32 57
+; CHECK-NEXT:    br i1 [[TMP563]], label %[[PRED_STORE_IF369:.*]], label %[[PRED_STORE_CONTINUE370:.*]]
+; CHECK:       [[PRED_STORE_IF369]]:
+; CHECK-NEXT:    [[TMP564:%.*]] = add i64 [[INDEX]], 185
+; CHECK-NEXT:    [[TMP565:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP564]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP565]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE370]]
+; CHECK:       [[PRED_STORE_CONTINUE370]]:
+; CHECK-NEXT:    [[TMP566:%.*]] = extractelement <64 x i1> [[TMP2]], i32 58
+; CHECK-NEXT:    br i1 [[TMP566]], label %[[PRED_STORE_IF371:.*]], label %[[PRED_STORE_CONTINUE372:.*]]
+; CHECK:       [[PRED_STORE_IF371]]:
+; CHECK-NEXT:    [[TMP567:%.*]] = add i64 [[INDEX]], 186
+; CHECK-NEXT:    [[TMP568:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP567]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP568]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE372]]
+; CHECK:       [[PRED_STORE_CONTINUE372]]:
+; CHECK-NEXT:    [[TMP569:%.*]] = extractelement <64 x i1> [[TMP2]], i32 59
+; CHECK-NEXT:    br i1 [[TMP569]], label %[[PRED_STORE_IF373:.*]], label %[[PRED_STORE_CONTINUE374:.*]]
+; CHECK:       [[PRED_STORE_IF373]]:
+; CHECK-NEXT:    [[TMP570:%.*]] = add i64 [[INDEX]], 187
+; CHECK-NEXT:    [[TMP571:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP570]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP571]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE374]]
+; CHECK:       [[PRED_STORE_CONTINUE374]]:
+; CHECK-NEXT:    [[TMP572:%.*]] = extractelement <64 x i1> [[TMP2]], i32 60
+; CHECK-NEXT:    br i1 [[TMP572]], label %[[PRED_STORE_IF375:.*]], label %[[PRED_STORE_CONTINUE376:.*]]
+; CHECK:       [[PRED_STORE_IF375]]:
+; CHECK-NEXT:    [[TMP573:%.*]] = add i64 [[INDEX]], 188
+; CHECK-NEXT:    [[TMP574:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP573]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP574]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE376]]
+; CHECK:       [[PRED_STORE_CONTINUE376]]:
+; CHECK-NEXT:    [[TMP575:%.*]] = extractelement <64 x i1> [[TMP2]], i32 61
+; CHECK-NEXT:    br i1 [[TMP575]], label %[[PRED_STORE_IF377:.*]], label %[[PRED_STORE_CONTINUE378:.*]]
+; CHECK:       [[PRED_STORE_IF377]]:
+; CHECK-NEXT:    [[TMP576:%.*]] = add i64 [[INDEX]], 189
+; CHECK-NEXT:    [[TMP577:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP576]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP577]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE378]]
+; CHECK:       [[PRED_STORE_CONTINUE378]]:
+; CHECK-NEXT:    [[TMP578:%.*]] = extractelement <64 x i1> [[TMP2]], i32 62
+; CHECK-NEXT:    br i1 [[TMP578]], label %[[PRED_STORE_IF379:.*]], label %[[PRED_STORE_CONTINUE380:.*]]
+; CHECK:       [[PRED_STORE_IF379]]:
+; CHECK-NEXT:    [[TMP579:%.*]] = add i64 [[INDEX]], 190
+; CHECK-NEXT:    [[TMP580:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP579]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP580]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE380]]
+; CHECK:       [[PRED_STORE_CONTINUE380]]:
+; CHECK-NEXT:    [[TMP581:%.*]] = extractelement <64 x i1> [[TMP2]], i32 63
+; CHECK-NEXT:    br i1 [[TMP581]], label %[[PRED_STORE_IF381:.*]], label %[[PRED_STORE_CONTINUE382:.*]]
+; CHECK:       [[PRED_STORE_IF381]]:
+; CHECK-NEXT:    [[TMP582:%.*]] = add i64 [[INDEX]], 191
+; CHECK-NEXT:    [[TMP583:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP582]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP583]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE382]]
+; CHECK:       [[PRED_STORE_CONTINUE382]]:
+; CHECK-NEXT:    [[TMP584:%.*]] = extractelement <64 x i1> [[TMP3]], i32 0
+; CHECK-NEXT:    br i1 [[TMP584]], label %[[PRED_STORE_IF383:.*]], label %[[PRED_STORE_CONTINUE384:.*]]
+; CHECK:       [[PRED_STORE_IF383]]:
+; CHECK-NEXT:    [[TMP585:%.*]] = add i64 [[INDEX]], 192
+; CHECK-NEXT:    [[TMP586:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP585]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP586]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE384]]
+; CHECK:       [[PRED_STORE_CONTINUE384]]:
+; CHECK-NEXT:    [[TMP587:%.*]] = extractelement <64 x i1> [[TMP3]], i32 1
+; CHECK-NEXT:    br i1 [[TMP587]], label %[[PRED_STORE_IF385:.*]], label %[[PRED_STORE_CONTINUE386:.*]]
+; CHECK:       [[PRED_STORE_IF385]]:
+; CHECK-NEXT:    [[TMP588:%.*]] = add i64 [[INDEX]], 193
+; CHECK-NEXT:    [[TMP589:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP588]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP589]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE386]]
+; CHECK:       [[PRED_STORE_CONTINUE386]]:
+; CHECK-NEXT:    [[TMP590:%.*]] = extractelement <64 x i1> [[TMP3]], i32 2
+; CHECK-NEXT:    br i1 [[TMP590]], label %[[PRED_STORE_IF387:.*]], label %[[PRED_STORE_CONTINUE388:.*]]
+; CHECK:       [[PRED_STORE_IF387]]:
+; CHECK-NEXT:    [[TMP591:%.*]] = add i64 [[INDEX]], 194
+; CHECK-NEXT:    [[TMP592:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP591]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP592]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE388]]
+; CHECK:       [[PRED_STORE_CONTINUE388]]:
+; CHECK-NEXT:    [[TMP593:%.*]] = extractelement <64 x i1> [[TMP3]], i32 3
+; CHECK-NEXT:    br i1 [[TMP593]], label %[[PRED_STORE_IF389:.*]], label %[[PRED_STORE_CONTINUE390:.*]]
+; CHECK:       [[PRED_STORE_IF389]]:
+; CHECK-NEXT:    [[TMP594:%.*]] = add i64 [[INDEX]], 195
+; CHECK-NEXT:    [[TMP595:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP594]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP595]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE390]]
+; CHECK:       [[PRED_STORE_CONTINUE390]]:
+; CHECK-NEXT:    [[TMP596:%.*]] = extractelement <64 x i1> [[TMP3]], i32 4
+; CHECK-NEXT:    br i1 [[TMP596]], label %[[PRED_STORE_IF391:.*]], label %[[PRED_STORE_CONTINUE392:.*]]
+; CHECK:       [[PRED_STORE_IF391]]:
+; CHECK-NEXT:    [[TMP597:%.*]] = add i64 [[INDEX]], 196
+; CHECK-NEXT:    [[TMP598:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP597]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP598]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE392]]
+; CHECK:       [[PRED_STORE_CONTINUE392]]:
+; CHECK-NEXT:    [[TMP599:%.*]] = extractelement <64 x i1> [[TMP3]], i32 5
+; CHECK-NEXT:    br i1 [[TMP599]], label %[[PRED_STORE_IF393:.*]], label %[[PRED_STORE_CONTINUE394:.*]]
+; CHECK:       [[PRED_STORE_IF393]]:
+; CHECK-NEXT:    [[TMP600:%.*]] = add i64 [[INDEX]], 197
+; CHECK-NEXT:    [[TMP601:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP600]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP601]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE394]]
+; CHECK:       [[PRED_STORE_CONTINUE394]]:
+; CHECK-NEXT:    [[TMP602:%.*]] = extractelement <64 x i1> [[TMP3]], i32 6
+; CHECK-NEXT:    br i1 [[TMP602]], label %[[PRED_STORE_IF395:.*]], label %[[PRED_STORE_CONTINUE396:.*]]
+; CHECK:       [[PRED_STORE_IF395]]:
+; CHECK-NEXT:    [[TMP603:%.*]] = add i64 [[INDEX]], 198
+; CHECK-NEXT:    [[TMP604:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP603]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP604]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE396]]
+; CHECK:       [[PRED_STORE_CONTINUE396]]:
+; CHECK-NEXT:    [[TMP605:%.*]] = extractelement <64 x i1> [[TMP3]], i32 7
+; CHECK-NEXT:    br i1 [[TMP605]], label %[[PRED_STORE_IF397:.*]], label %[[PRED_STORE_CONTINUE398:.*]]
+; CHECK:       [[PRED_STORE_IF397]]:
+; CHECK-NEXT:    [[TMP606:%.*]] = add i64 [[INDEX]], 199
+; CHECK-NEXT:    [[TMP607:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP606]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP607]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE398]]
+; CHECK:       [[PRED_STORE_CONTINUE398]]:
+; CHECK-NEXT:    [[TMP608:%.*]] = extractelement <64 x i1> [[TMP3]], i32 8
+; CHECK-NEXT:    br i1 [[TMP608]], label %[[PRED_STORE_IF399:.*]], label %[[PRED_STORE_CONTINUE400:.*]]
+; CHECK:       [[PRED_STORE_IF399]]:
+; CHECK-NEXT:    [[TMP609:%.*]] = add i64 [[INDEX]], 200
+; CHECK-NEXT:    [[TMP610:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP609]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP610]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE400]]
+; CHECK:       [[PRED_STORE_CONTINUE400]]:
+; CHECK-NEXT:    [[TMP611:%.*]] = extractelement <64 x i1> [[TMP3]], i32 9
+; CHECK-NEXT:    br i1 [[TMP611]], label %[[PRED_STORE_IF401:.*]], label %[[PRED_STORE_CONTINUE402:.*]]
+; CHECK:       [[PRED_STORE_IF401]]:
+; CHECK-NEXT:    [[TMP612:%.*]] = add i64 [[INDEX]], 201
+; CHECK-NEXT:    [[TMP613:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP612]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP613]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE402]]
+; CHECK:       [[PRED_STORE_CONTINUE402]]:
+; CHECK-NEXT:    [[TMP614:%.*]] = extractelement <64 x i1> [[TMP3]], i32 10
+; CHECK-NEXT:    br i1 [[TMP614]], label %[[PRED_STORE_IF403:.*]], label %[[PRED_STORE_CONTINUE404:.*]]
+; CHECK:       [[PRED_STORE_IF403]]:
+; CHECK-NEXT:    [[TMP615:%.*]] = add i64 [[INDEX]], 202
+; CHECK-NEXT:    [[TMP616:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP615]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP616]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE404]]
+; CHECK:       [[PRED_STORE_CONTINUE404]]:
+; CHECK-NEXT:    [[TMP617:%.*]] = extractelement <64 x i1> [[TMP3]], i32 11
+; CHECK-NEXT:    br i1 [[TMP617]], label %[[PRED_STORE_IF405:.*]], label %[[PRED_STORE_CONTINUE406:.*]]
+; CHECK:       [[PRED_STORE_IF405]]:
+; CHECK-NEXT:    [[TMP618:%.*]] = add i64 [[INDEX]], 203
+; CHECK-NEXT:    [[TMP619:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP618]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP619]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE406]]
+; CHECK:       [[PRED_STORE_CONTINUE406]]:
+; CHECK-NEXT:    [[TMP620:%.*]] = extractelement <64 x i1> [[TMP3]], i32 12
+; CHECK-NEXT:    br i1 [[TMP620]], label %[[PRED_STORE_IF407:.*]], label %[[PRED_STORE_CONTINUE408:.*]]
+; CHECK:       [[PRED_STORE_IF407]]:
+; CHECK-NEXT:    [[TMP621:%.*]] = add i64 [[INDEX]], 204
+; CHECK-NEXT:    [[TMP622:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP621]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP622]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE408]]
+; CHECK:       [[PRED_STORE_CONTINUE408]]:
+; CHECK-NEXT:    [[TMP623:%.*]] = extractelement <64 x i1> [[TMP3]], i32 13
+; CHECK-NEXT:    br i1 [[TMP623]], label %[[PRED_STORE_IF409:.*]], label %[[PRED_STORE_CONTINUE410:.*]]
+; CHECK:       [[PRED_STORE_IF409]]:
+; CHECK-NEXT:    [[TMP624:%.*]] = add i64 [[INDEX]], 205
+; CHECK-NEXT:    [[TMP625:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP624]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP625]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE410]]
+; CHECK:       [[PRED_STORE_CONTINUE410]]:
+; CHECK-NEXT:    [[TMP626:%.*]] = extractelement <64 x i1> [[TMP3]], i32 14
+; CHECK-NEXT:    br i1 [[TMP626]], label %[[PRED_STORE_IF411:.*]], label %[[PRED_STORE_CONTINUE412:.*]]
+; CHECK:       [[PRED_STORE_IF411]]:
+; CHECK-NEXT:    [[TMP627:%.*]] = add i64 [[INDEX]], 206
+; CHECK-NEXT:    [[TMP628:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP627]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP628]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE412]]
+; CHECK:       [[PRED_STORE_CONTINUE412]]:
+; CHECK-NEXT:    [[TMP629:%.*]] = extractelement <64 x i1> [[TMP3]], i32 15
+; CHECK-NEXT:    br i1 [[TMP629]], label %[[PRED_STORE_IF413:.*]], label %[[PRED_STORE_CONTINUE414:.*]]
+; CHECK:       [[PRED_STORE_IF413]]:
+; CHECK-NEXT:    [[TMP630:%.*]] = add i64 [[INDEX]], 207
+; CHECK-NEXT:    [[TMP631:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP630]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP631]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE414]]
+; CHECK:       [[PRED_STORE_CONTINUE414]]:
+; CHECK-NEXT:    [[TMP632:%.*]] = extractelement <64 x i1> [[TMP3]], i32 16
+; CHECK-NEXT:    br i1 [[TMP632]], label %[[PRED_STORE_IF415:.*]], label %[[PRED_STORE_CONTINUE416:.*]]
+; CHECK:       [[PRED_STORE_IF415]]:
+; CHECK-NEXT:    [[TMP633:%.*]] = add i64 [[INDEX]], 208
+; CHECK-NEXT:    [[TMP634:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP633]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP634]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE416]]
+; CHECK:       [[PRED_STORE_CONTINUE416]]:
+; CHECK-NEXT:    [[TMP635:%.*]] = extractelement <64 x i1> [[TMP3]], i32 17
+; CHECK-NEXT:    br i1 [[TMP635]], label %[[PRED_STORE_IF417:.*]], label %[[PRED_STORE_CONTINUE418:.*]]
+; CHECK:       [[PRED_STORE_IF417]]:
+; CHECK-NEXT:    [[TMP636:%.*]] = add i64 [[INDEX]], 209
+; CHECK-NEXT:    [[TMP637:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP636]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP637]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE418]]
+; CHECK:       [[PRED_STORE_CONTINUE418]]:
+; CHECK-NEXT:    [[TMP638:%.*]] = extractelement <64 x i1> [[TMP3]], i32 18
+; CHECK-NEXT:    br i1 [[TMP638]], label %[[PRED_STORE_IF419:.*]], label %[[PRED_STORE_CONTINUE420:.*]]
+; CHECK:       [[PRED_STORE_IF419]]:
+; CHECK-NEXT:    [[TMP639:%.*]] = add i64 [[INDEX]], 210
+; CHECK-NEXT:    [[TMP640:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP639]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP640]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE420]]
+; CHECK:       [[PRED_STORE_CONTINUE420]]:
+; CHECK-NEXT:    [[TMP641:%.*]] = extractelement <64 x i1> [[TMP3]], i32 19
+; CHECK-NEXT:    br i1 [[TMP641]], label %[[PRED_STORE_IF421:.*]], label %[[PRED_STORE_CONTINUE422:.*]]
+; CHECK:       [[PRED_STORE_IF421]]:
+; CHECK-NEXT:    [[TMP642:%.*]] = add i64 [[INDEX]], 211
+; CHECK-NEXT:    [[TMP643:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP642]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP643]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE422]]
+; CHECK:       [[PRED_STORE_CONTINUE422]]:
+; CHECK-NEXT:    [[TMP644:%.*]] = extractelement <64 x i1> [[TMP3]], i32 20
+; CHECK-NEXT:    br i1 [[TMP644]], label %[[PRED_STORE_IF423:.*]], label %[[PRED_STORE_CONTINUE424:.*]]
+; CHECK:       [[PRED_STORE_IF423]]:
+; CHECK-NEXT:    [[TMP645:%.*]] = add i64 [[INDEX]], 212
+; CHECK-NEXT:    [[TMP646:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP645]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP646]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE424]]
+; CHECK:       [[PRED_STORE_CONTINUE424]]:
+; CHECK-NEXT:    [[TMP647:%.*]] = extractelement <64 x i1> [[TMP3]], i32 21
+; CHECK-NEXT:    br i1 [[TMP647]], label %[[PRED_STORE_IF425:.*]], label %[[PRED_STORE_CONTINUE426:.*]]
+; CHECK:       [[PRED_STORE_IF425]]:
+; CHECK-NEXT:    [[TMP648:%.*]] = add i64 [[INDEX]], 213
+; CHECK-NEXT:    [[TMP649:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP648]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP649]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE426]]
+; CHECK:       [[PRED_STORE_CONTINUE426]]:
+; CHECK-NEXT:    [[TMP650:%.*]] = extractelement <64 x i1> [[TMP3]], i32 22
+; CHECK-NEXT:    br i1 [[TMP650]], label %[[PRED_STORE_IF427:.*]], label %[[PRED_STORE_CONTINUE428:.*]]
+; CHECK:       [[PRED_STORE_IF427]]:
+; CHECK-NEXT:    [[TMP651:%.*]] = add i64 [[INDEX]], 214
+; CHECK-NEXT:    [[TMP652:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP651]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP652]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE428]]
+; CHECK:       [[PRED_STORE_CONTINUE428]]:
+; CHECK-NEXT:    [[TMP653:%.*]] = extractelement <64 x i1> [[TMP3]], i32 23
+; CHECK-NEXT:    br i1 [[TMP653]], label %[[PRED_STORE_IF429:.*]], label %[[PRED_STORE_CONTINUE430:.*]]
+; CHECK:       [[PRED_STORE_IF429]]:
+; CHECK-NEXT:    [[TMP654:%.*]] = add i64 [[INDEX]], 215
+; CHECK-NEXT:    [[TMP655:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP654]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP655]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE430]]
+; CHECK:       [[PRED_STORE_CONTINUE430]]:
+; CHECK-NEXT:    [[TMP656:%.*]] = extractelement <64 x i1> [[TMP3]], i32 24
+; CHECK-NEXT:    br i1 [[TMP656]], label %[[PRED_STORE_IF431:.*]], label %[[PRED_STORE_CONTINUE432:.*]]
+; CHECK:       [[PRED_STORE_IF431]]:
+; CHECK-NEXT:    [[TMP657:%.*]] = add i64 [[INDEX]], 216
+; CHECK-NEXT:    [[TMP658:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP657]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP658]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE432]]
+; CHECK:       [[PRED_STORE_CONTINUE432]]:
+; CHECK-NEXT:    [[TMP659:%.*]] = extractelement <64 x i1> [[TMP3]], i32 25
+; CHECK-NEXT:    br i1 [[TMP659]], label %[[PRED_STORE_IF433:.*]], label %[[PRED_STORE_CONTINUE434:.*]]
+; CHECK:       [[PRED_STORE_IF433]]:
+; CHECK-NEXT:    [[TMP660:%.*]] = add i64 [[INDEX]], 217
+; CHECK-NEXT:    [[TMP661:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP660]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP661]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE434]]
+; CHECK:       [[PRED_STORE_CONTINUE434]]:
+; CHECK-NEXT:    [[TMP662:%.*]] = extractelement <64 x i1> [[TMP3]], i32 26
+; CHECK-NEXT:    br i1 [[TMP662]], label %[[PRED_STORE_IF435:.*]], label %[[PRED_STORE_CONTINUE436:.*]]
+; CHECK:       [[PRED_STORE_IF435]]:
+; CHECK-NEXT:    [[TMP663:%.*]] = add i64 [[INDEX]], 218
+; CHECK-NEXT:    [[TMP664:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP663]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP664]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE436]]
+; CHECK:       [[PRED_STORE_CONTINUE436]]:
+; CHECK-NEXT:    [[TMP665:%.*]] = extractelement <64 x i1> [[TMP3]], i32 27
+; CHECK-NEXT:    br i1 [[TMP665]], label %[[PRED_STORE_IF437:.*]], label %[[PRED_STORE_CONTINUE438:.*]]
+; CHECK:       [[PRED_STORE_IF437]]:
+; CHECK-NEXT:    [[TMP666:%.*]] = add i64 [[INDEX]], 219
+; CHECK-NEXT:    [[TMP667:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP666]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP667]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE438]]
+; CHECK:       [[PRED_STORE_CONTINUE438]]:
+; CHECK-NEXT:    [[TMP668:%.*]] = extractelement <64 x i1> [[TMP3]], i32 28
+; CHECK-NEXT:    br i1 [[TMP668]], label %[[PRED_STORE_IF439:.*]], label %[[PRED_STORE_CONTINUE440:.*]]
+; CHECK:       [[PRED_STORE_IF439]]:
+; CHECK-NEXT:    [[TMP669:%.*]] = add i64 [[INDEX]], 220
+; CHECK-NEXT:    [[TMP670:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP669]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP670]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE440]]
+; CHECK:       [[PRED_STORE_CONTINUE440]]:
+; CHECK-NEXT:    [[TMP671:%.*]] = extractelement <64 x i1> [[TMP3]], i32 29
+; CHECK-NEXT:    br i1 [[TMP671]], label %[[PRED_STORE_IF441:.*]], label %[[PRED_STORE_CONTINUE442:.*]]
+; CHECK:       [[PRED_STORE_IF441]]:
+; CHECK-NEXT:    [[TMP672:%.*]] = add i64 [[INDEX]], 221
+; CHECK-NEXT:    [[TMP673:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP672]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP673]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE442]]
+; CHECK:       [[PRED_STORE_CONTINUE442]]:
+; CHECK-NEXT:    [[TMP674:%.*]] = extractelement <64 x i1> [[TMP3]], i32 30
+; CHECK-NEXT:    br i1 [[TMP674]], label %[[PRED_STORE_IF443:.*]], label %[[PRED_STORE_CONTINUE444:.*]]
+; CHECK:       [[PRED_STORE_IF443]]:
+; CHECK-NEXT:    [[TMP675:%.*]] = add i64 [[INDEX]], 222
+; CHECK-NEXT:    [[TMP676:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP675]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP676]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE444]]
+; CHECK:       [[PRED_STORE_CONTINUE444]]:
+; CHECK-NEXT:    [[TMP677:%.*]] = extractelement <64 x i1> [[TMP3]], i32 31
+; CHECK-NEXT:    br i1 [[TMP677]], label %[[PRED_STORE_IF445:.*]], label %[[PRED_STORE_CONTINUE446:.*]]
+; CHECK:       [[PRED_STORE_IF445]]:
+; CHECK-NEXT:    [[TMP678:%.*]] = add i64 [[INDEX]], 223
+; CHECK-NEXT:    [[TMP679:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP678]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP679]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE446]]
+; CHECK:       [[PRED_STORE_CONTINUE446]]:
+; CHECK-NEXT:    [[TMP680:%.*]] = extractelement <64 x i1> [[TMP3]], i32 32
+; CHECK-NEXT:    br i1 [[TMP680]], label %[[PRED_STORE_IF447:.*]], label %[[PRED_STORE_CONTINUE448:.*]]
+; CHECK:       [[PRED_STORE_IF447]]:
+; CHECK-NEXT:    [[TMP681:%.*]] = add i64 [[INDEX]], 224
+; CHECK-NEXT:    [[TMP682:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP681]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP682]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE448]]
+; CHECK:       [[PRED_STORE_CONTINUE448]]:
+; CHECK-NEXT:    [[TMP683:%.*]] = extractelement <64 x i1> [[TMP3]], i32 33
+; CHECK-NEXT:    br i1 [[TMP683]], label %[[PRED_STORE_IF449:.*]], label %[[PRED_STORE_CONTINUE450:.*]]
+; CHECK:       [[PRED_STORE_IF449]]:
+; CHECK-NEXT:    [[TMP684:%.*]] = add i64 [[INDEX]], 225
+; CHECK-NEXT:    [[TMP685:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP684]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP685]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE450]]
+; CHECK:       [[PRED_STORE_CONTINUE450]]:
+; CHECK-NEXT:    [[TMP686:%.*]] = extractelement <64 x i1> [[TMP3]], i32 34
+; CHECK-NEXT:    br i1 [[TMP686]], label %[[PRED_STORE_IF451:.*]], label %[[PRED_STORE_CONTINUE452:.*]]
+; CHECK:       [[PRED_STORE_IF451]]:
+; CHECK-NEXT:    [[TMP687:%.*]] = add i64 [[INDEX]], 226
+; CHECK-NEXT:    [[TMP688:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP687]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP688]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE452]]
+; CHECK:       [[PRED_STORE_CONTINUE452]]:
+; CHECK-NEXT:    [[TMP689:%.*]] = extractelement <64 x i1> [[TMP3]], i32 35
+; CHECK-NEXT:    br i1 [[TMP689]], label %[[PRED_STORE_IF453:.*]], label %[[PRED_STORE_CONTINUE454:.*]]
+; CHECK:       [[PRED_STORE_IF453]]:
+; CHECK-NEXT:    [[TMP690:%.*]] = add i64 [[INDEX]], 227
+; CHECK-NEXT:    [[TMP691:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP690]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP691]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE454]]
+; CHECK:       [[PRED_STORE_CONTINUE454]]:
+; CHECK-NEXT:    [[TMP692:%.*]] = extractelement <64 x i1> [[TMP3]], i32 36
+; CHECK-NEXT:    br i1 [[TMP692]], label %[[PRED_STORE_IF455:.*]], label %[[PRED_STORE_CONTINUE456:.*]]
+; CHECK:       [[PRED_STORE_IF455]]:
+; CHECK-NEXT:    [[TMP693:%.*]] = add i64 [[INDEX]], 228
+; CHECK-NEXT:    [[TMP694:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP693]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP694]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE456]]
+; CHECK:       [[PRED_STORE_CONTINUE456]]:
+; CHECK-NEXT:    [[TMP695:%.*]] = extractelement <64 x i1> [[TMP3]], i32 37
+; CHECK-NEXT:    br i1 [[TMP695]], label %[[PRED_STORE_IF457:.*]], label %[[PRED_STORE_CONTINUE458:.*]]
+; CHECK:       [[PRED_STORE_IF457]]:
+; CHECK-NEXT:    [[TMP696:%.*]] = add i64 [[INDEX]], 229
+; CHECK-NEXT:    [[TMP697:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP696]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP697]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE458]]
+; CHECK:       [[PRED_STORE_CONTINUE458]]:
+; CHECK-NEXT:    [[TMP698:%.*]] = extractelement <64 x i1> [[TMP3]], i32 38
+; CHECK-NEXT:    br i1 [[TMP698]], label %[[PRED_STORE_IF459:.*]], label %[[PRED_STORE_CONTINUE460:.*]]
+; CHECK:       [[PRED_STORE_IF459]]:
+; CHECK-NEXT:    [[TMP699:%.*]] = add i64 [[INDEX]], 230
+; CHECK-NEXT:    [[TMP700:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP699]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP700]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE460]]
+; CHECK:       [[PRED_STORE_CONTINUE460]]:
+; CHECK-NEXT:    [[TMP701:%.*]] = extractelement <64 x i1> [[TMP3]], i32 39
+; CHECK-NEXT:    br i1 [[TMP701]], label %[[PRED_STORE_IF461:.*]], label %[[PRED_STORE_CONTINUE462:.*]]
+; CHECK:       [[PRED_STORE_IF461]]:
+; CHECK-NEXT:    [[TMP702:%.*]] = add i64 [[INDEX]], 231
+; CHECK-NEXT:    [[TMP703:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP702]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP703]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE462]]
+; CHECK:       [[PRED_STORE_CONTINUE462]]:
+; CHECK-NEXT:    [[TMP704:%.*]] = extractelement <64 x i1> [[TMP3]], i32 40
+; CHECK-NEXT:    br i1 [[TMP704]], label %[[PRED_STORE_IF463:.*]], label %[[PRED_STORE_CONTINUE464:.*]]
+; CHECK:       [[PRED_STORE_IF463]]:
+; CHECK-NEXT:    [[TMP705:%.*]] = add i64 [[INDEX]], 232
+; CHECK-NEXT:    [[TMP706:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP705]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP706]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE464]]
+; CHECK:       [[PRED_STORE_CONTINUE464]]:
+; CHECK-NEXT:    [[TMP707:%.*]] = extractelement <64 x i1> [[TMP3]], i32 41
+; CHECK-NEXT:    br i1 [[TMP707]], label %[[PRED_STORE_IF465:.*]], label %[[PRED_STORE_CONTINUE466:.*]]
+; CHECK:       [[PRED_STORE_IF465]]:
+; CHECK-NEXT:    [[TMP708:%.*]] = add i64 [[INDEX]], 233
+; CHECK-NEXT:    [[TMP709:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP708]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP709]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE466]]
+; CHECK:       [[PRED_STORE_CONTINUE466]]:
+; CHECK-NEXT:    [[TMP710:%.*]] = extractelement <64 x i1> [[TMP3]], i32 42
+; CHECK-NEXT:    br i1 [[TMP710]], label %[[PRED_STORE_IF467:.*]], label %[[PRED_STORE_CONTINUE468:.*]]
+; CHECK:       [[PRED_STORE_IF467]]:
+; CHECK-NEXT:    [[TMP711:%.*]] = add i64 [[INDEX]], 234
+; CHECK-NEXT:    [[TMP712:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP711]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP712]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE468]]
+; CHECK:       [[PRED_STORE_CONTINUE468]]:
+; CHECK-NEXT:    [[TMP713:%.*]] = extractelement <64 x i1> [[TMP3]], i32 43
+; CHECK-NEXT:    br i1 [[TMP713]], label %[[PRED_STORE_IF469:.*]], label %[[PRED_STORE_CONTINUE470:.*]]
+; CHECK:       [[PRED_STORE_IF469]]:
+; CHECK-NEXT:    [[TMP714:%.*]] = add i64 [[INDEX]], 235
+; CHECK-NEXT:    [[TMP715:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP714]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP715]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE470]]
+; CHECK:       [[PRED_STORE_CONTINUE470]]:
+; CHECK-NEXT:    [[TMP716:%.*]] = extractelement <64 x i1> [[TMP3]], i32 44
+; CHECK-NEXT:    br i1 [[TMP716]], label %[[PRED_STORE_IF471:.*]], label %[[PRED_STORE_CONTINUE472:.*]]
+; CHECK:       [[PRED_STORE_IF471]]:
+; CHECK-NEXT:    [[TMP717:%.*]] = add i64 [[INDEX]], 236
+; CHECK-NEXT:    [[TMP718:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP717]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP718]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE472]]
+; CHECK:       [[PRED_STORE_CONTINUE472]]:
+; CHECK-NEXT:    [[TMP719:%.*]] = extractelement <64 x i1> [[TMP3]], i32 45
+; CHECK-NEXT:    br i1 [[TMP719]], label %[[PRED_STORE_IF473:.*]], label %[[PRED_STORE_CONTINUE474:.*]]
+; CHECK:       [[PRED_STORE_IF473]]:
+; CHECK-NEXT:    [[TMP720:%.*]] = add i64 [[INDEX]], 237
+; CHECK-NEXT:    [[TMP721:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP720]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP721]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE474]]
+; CHECK:       [[PRED_STORE_CONTINUE474]]:
+; CHECK-NEXT:    [[TMP722:%.*]] = extractelement <64 x i1> [[TMP3]], i32 46
+; CHECK-NEXT:    br i1 [[TMP722]], label %[[PRED_STORE_IF475:.*]], label %[[PRED_STORE_CONTINUE476:.*]]
+; CHECK:       [[PRED_STORE_IF475]]:
+; CHECK-NEXT:    [[TMP723:%.*]] = add i64 [[INDEX]], 238
+; CHECK-NEXT:    [[TMP724:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP723]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP724]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE476]]
+; CHECK:       [[PRED_STORE_CONTINUE476]]:
+; CHECK-NEXT:    [[TMP725:%.*]] = extractelement <64 x i1> [[TMP3]], i32 47
+; CHECK-NEXT:    br i1 [[TMP725]], label %[[PRED_STORE_IF477:.*]], label %[[PRED_STORE_CONTINUE478:.*]]
+; CHECK:       [[PRED_STORE_IF477]]:
+; CHECK-NEXT:    [[TMP726:%.*]] = add i64 [[INDEX]], 239
+; CHECK-NEXT:    [[TMP727:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP726]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP727]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE478]]
+; CHECK:       [[PRED_STORE_CONTINUE478]]:
+; CHECK-NEXT:    [[TMP728:%.*]] = extractelement <64 x i1> [[TMP3]], i32 48
+; CHECK-NEXT:    br i1 [[TMP728]], label %[[PRED_STORE_IF479:.*]], label %[[PRED_STORE_CONTINUE480:.*]]
+; CHECK:       [[PRED_STORE_IF479]]:
+; CHECK-NEXT:    [[TMP729:%.*]] = add i64 [[INDEX]], 240
+; CHECK-NEXT:    [[TMP730:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP729]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP730]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE480]]
+; CHECK:       [[PRED_STORE_CONTINUE480]]:
+; CHECK-NEXT:    [[TMP731:%.*]] = extractelement <64 x i1> [[TMP3]], i32 49
+; CHECK-NEXT:    br i1 [[TMP731]], label %[[PRED_STORE_IF481:.*]], label %[[PRED_STORE_CONTINUE482:.*]]
+; CHECK:       [[PRED_STORE_IF481]]:
+; CHECK-NEXT:    [[TMP732:%.*]] = add i64 [[INDEX]], 241
+; CHECK-NEXT:    [[TMP733:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP732]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP733]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE482]]
+; CHECK:       [[PRED_STORE_CONTINUE482]]:
+; CHECK-NEXT:    [[TMP734:%.*]] = extractelement <64 x i1> [[TMP3]], i32 50
+; CHECK-NEXT:    br i1 [[TMP734]], label %[[PRED_STORE_IF483:.*]], label %[[PRED_STORE_CONTINUE484:.*]]
+; CHECK:       [[PRED_STORE_IF483]]:
+; CHECK-NEXT:    [[TMP735:%.*]] = add i64 [[INDEX]], 242
+; CHECK-NEXT:    [[TMP736:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP735]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP736]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE484]]
+; CHECK:       [[PRED_STORE_CONTINUE484]]:
+; CHECK-NEXT:    [[TMP737:%.*]] = extractelement <64 x i1> [[TMP3]], i32 51
+; CHECK-NEXT:    br i1 [[TMP737]], label %[[PRED_STORE_IF485:.*]], label %[[PRED_STORE_CONTINUE486:.*]]
+; CHECK:       [[PRED_STORE_IF485]]:
+; CHECK-NEXT:    [[TMP738:%.*]] = add i64 [[INDEX]], 243
+; CHECK-NEXT:    [[TMP739:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP738]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP739]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE486]]
+; CHECK:       [[PRED_STORE_CONTINUE486]]:
+; CHECK-NEXT:    [[TMP740:%.*]] = extractelement <64 x i1> [[TMP3]], i32 52
+; CHECK-NEXT:    br i1 [[TMP740]], label %[[PRED_STORE_IF487:.*]], label %[[PRED_STORE_CONTINUE488:.*]]
+; CHECK:       [[PRED_STORE_IF487]]:
+; CHECK-NEXT:    [[TMP741:%.*]] = add i64 [[INDEX]], 244
+; CHECK-NEXT:    [[TMP742:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP741]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP742]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE488]]
+; CHECK:       [[PRED_STORE_CONTINUE488]]:
+; CHECK-NEXT:    [[TMP743:%.*]] = extractelement <64 x i1> [[TMP3]], i32 53
+; CHECK-NEXT:    br i1 [[TMP743]], label %[[PRED_STORE_IF489:.*]], label %[[PRED_STORE_CONTINUE490:.*]]
+; CHECK:       [[PRED_STORE_IF489]]:
+; CHECK-NEXT:    [[TMP744:%.*]] = add i64 [[INDEX]], 245
+; CHECK-NEXT:    [[TMP745:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP744]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP745]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE490]]
+; CHECK:       [[PRED_STORE_CONTINUE490]]:
+; CHECK-NEXT:    [[TMP746:%.*]] = extractelement <64 x i1> [[TMP3]], i32 54
+; CHECK-NEXT:    br i1 [[TMP746]], label %[[PRED_STORE_IF491:.*]], label %[[PRED_STORE_CONTINUE492:.*]]
+; CHECK:       [[PRED_STORE_IF491]]:
+; CHECK-NEXT:    [[TMP747:%.*]] = add i64 [[INDEX]], 246
+; CHECK-NEXT:    [[TMP748:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP747]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP748]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE492]]
+; CHECK:       [[PRED_STORE_CONTINUE492]]:
+; CHECK-NEXT:    [[TMP749:%.*]] = extractelement <64 x i1> [[TMP3]], i32 55
+; CHECK-NEXT:    br i1 [[TMP749]], label %[[PRED_STORE_IF493:.*]], label %[[PRED_STORE_CONTINUE494:.*]]
+; CHECK:       [[PRED_STORE_IF493]]:
+; CHECK-NEXT:    [[TMP750:%.*]] = add i64 [[INDEX]], 247
+; CHECK-NEXT:    [[TMP751:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP750]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP751]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE494]]
+; CHECK:       [[PRED_STORE_CONTINUE494]]:
+; CHECK-NEXT:    [[TMP752:%.*]] = extractelement <64 x i1> [[TMP3]], i32 56
+; CHECK-NEXT:    br i1 [[TMP752]], label %[[PRED_STORE_IF495:.*]], label %[[PRED_STORE_CONTINUE496:.*]]
+; CHECK:       [[PRED_STORE_IF495]]:
+; CHECK-NEXT:    [[TMP753:%.*]] = add i64 [[INDEX]], 248
+; CHECK-NEXT:    [[TMP754:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP753]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP754]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE496]]
+; CHECK:       [[PRED_STORE_CONTINUE496]]:
+; CHECK-NEXT:    [[TMP755:%.*]] = extractelement <64 x i1> [[TMP3]], i32 57
+; CHECK-NEXT:    br i1 [[TMP755]], label %[[PRED_STORE_IF497:.*]], label %[[PRED_STORE_CONTINUE498:.*]]
+; CHECK:       [[PRED_STORE_IF497]]:
+; CHECK-NEXT:    [[TMP756:%.*]] = add i64 [[INDEX]], 249
+; CHECK-NEXT:    [[TMP757:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP756]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP757]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE498]]
+; CHECK:       [[PRED_STORE_CONTINUE498]]:
+; CHECK-NEXT:    [[TMP758:%.*]] = extractelement <64 x i1> [[TMP3]], i32 58
+; CHECK-NEXT:    br i1 [[TMP758]], label %[[PRED_STORE_IF499:.*]], label %[[PRED_STORE_CONTINUE500:.*]]
+; CHECK:       [[PRED_STORE_IF499]]:
+; CHECK-NEXT:    [[TMP759:%.*]] = add i64 [[INDEX]], 250
+; CHECK-NEXT:    [[TMP760:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP759]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP760]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE500]]
+; CHECK:       [[PRED_STORE_CONTINUE500]]:
+; CHECK-NEXT:    [[TMP761:%.*]] = extractelement <64 x i1> [[TMP3]], i32 59
+; CHECK-NEXT:    br i1 [[TMP761]], label %[[PRED_STORE_IF501:.*]], label %[[PRED_STORE_CONTINUE502:.*]]
+; CHECK:       [[PRED_STORE_IF501]]:
+; CHECK-NEXT:    [[TMP762:%.*]] = add i64 [[INDEX]], 251
+; CHECK-NEXT:    [[TMP763:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP762]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP763]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE502]]
+; CHECK:       [[PRED_STORE_CONTINUE502]]:
+; CHECK-NEXT:    [[TMP764:%.*]] = extractelement <64 x i1> [[TMP3]], i32 60
+; CHECK-NEXT:    br i1 [[TMP764]], label %[[PRED_STORE_IF503:.*]], label %[[PRED_STORE_CONTINUE504:.*]]
+; CHECK:       [[PRED_STORE_IF503]]:
+; CHECK-NEXT:    [[TMP765:%.*]] = add i64 [[INDEX]], 252
+; CHECK-NEXT:    [[TMP766:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP765]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP766]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE504]]
+; CHECK:       [[PRED_STORE_CONTINUE504]]:
+; CHECK-NEXT:    [[TMP767:%.*]] = extractelement <64 x i1> [[TMP3]], i32 61
+; CHECK-NEXT:    br i1 [[TMP767]], label %[[PRED_STORE_IF505:.*]], label %[[PRED_STORE_CONTINUE506:.*]]
+; CHECK:       [[PRED_STORE_IF505]]:
+; CHECK-NEXT:    [[TMP768:%.*]] = add i64 [[INDEX]], 253
+; CHECK-NEXT:    [[TMP769:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP768]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP769]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE506]]
+; CHECK:       [[PRED_STORE_CONTINUE506]]:
+; CHECK-NEXT:    [[TMP770:%.*]] = extractelement <64 x i1> [[TMP3]], i32 62
+; CHECK-NEXT:    br i1 [[TMP770]], label %[[PRED_STORE_IF507:.*]], label %[[PRED_STORE_CONTINUE508:.*]]
+; CHECK:       [[PRED_STORE_IF507]]:
+; CHECK-NEXT:    [[TMP771:%.*]] = add i64 [[INDEX]], 254
+; CHECK-NEXT:    [[TMP772:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP771]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP772]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE508]]
+; CHECK:       [[PRED_STORE_CONTINUE508]]:
+; CHECK-NEXT:    [[TMP773:%.*]] = extractelement <64 x i1> [[TMP3]], i32 63
+; CHECK-NEXT:    br i1 [[TMP773]], label %[[PRED_STORE_IF509:.*]], label %[[PRED_STORE_CONTINUE510:.*]]
+; CHECK:       [[PRED_STORE_IF509]]:
+; CHECK-NEXT:    [[TMP774:%.*]] = add i64 [[INDEX]], 255
+; CHECK-NEXT:    [[TMP775:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP774]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP775]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE510]]
+; CHECK:       [[PRED_STORE_CONTINUE510]]:
+; CHECK-NEXT:    [[TMP776:%.*]] = extractelement <64 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP776]], label %[[PRED_STORE_IF511:.*]], label %[[PRED_STORE_CONTINUE512:.*]]
+; CHECK:       [[PRED_STORE_IF511]]:
+; CHECK-NEXT:    [[TMP777:%.*]] = add i64 [[INDEX]], 256
+; CHECK-NEXT:    [[TMP778:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP777]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP778]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE512]]
+; CHECK:       [[PRED_STORE_CONTINUE512]]:
+; CHECK-NEXT:    [[TMP779:%.*]] = extractelement <64 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP779]], label %[[PRED_STORE_IF513:.*]], label %[[PRED_STORE_CONTINUE514:.*]]
+; CHECK:       [[PRED_STORE_IF513]]:
+; CHECK-NEXT:    [[TMP780:%.*]] = add i64 [[INDEX]], 257
+; CHECK-NEXT:    [[TMP781:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP780]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP781]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE514]]
+; CHECK:       [[PRED_STORE_CONTINUE514]]:
+; CHECK-NEXT:    [[TMP782:%.*]] = extractelement <64 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP782]], label %[[PRED_STORE_IF515:.*]], label %[[PRED_STORE_CONTINUE516:.*]]
+; CHECK:       [[PRED_STORE_IF515]]:
+; CHECK-NEXT:    [[TMP783:%.*]] = add i64 [[INDEX]], 258
+; CHECK-NEXT:    [[TMP784:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP783]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP784]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE516]]
+; CHECK:       [[PRED_STORE_CONTINUE516]]:
+; CHECK-NEXT:    [[TMP785:%.*]] = extractelement <64 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP785]], label %[[PRED_STORE_IF517:.*]], label %[[PRED_STORE_CONTINUE518:.*]]
+; CHECK:       [[PRED_STORE_IF517]]:
+; CHECK-NEXT:    [[TMP786:%.*]] = add i64 [[INDEX]], 259
+; CHECK-NEXT:    [[TMP787:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP786]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP787]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE518]]
+; CHECK:       [[PRED_STORE_CONTINUE518]]:
+; CHECK-NEXT:    [[TMP788:%.*]] = extractelement <64 x i1> [[TMP4]], i32 4
+; CHECK-NEXT:    br i1 [[TMP788]], label %[[PRED_STORE_IF519:.*]], label %[[PRED_STORE_CONTINUE520:.*]]
+; CHECK:       [[PRED_STORE_IF519]]:
+; CHECK-NEXT:    [[TMP789:%.*]] = add i64 [[INDEX]], 260
+; CHECK-NEXT:    [[TMP790:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP789]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP790]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE520]]
+; CHECK:       [[PRED_STORE_CONTINUE520]]:
+; CHECK-NEXT:    [[TMP791:%.*]] = extractelement <64 x i1> [[TMP4]], i32 5
+; CHECK-NEXT:    br i1 [[TMP791]], label %[[PRED_STORE_IF521:.*]], label %[[PRED_STORE_CONTINUE522:.*]]
+; CHECK:       [[PRED_STORE_IF521]]:
+; CHECK-NEXT:    [[TMP792:%.*]] = add i64 [[INDEX]], 261
+; CHECK-NEXT:    [[TMP793:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP792]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP793]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE522]]
+; CHECK:       [[PRED_STORE_CONTINUE522]]:
+; CHECK-NEXT:    [[TMP794:%.*]] = extractelement <64 x i1> [[TMP4]], i32 6
+; CHECK-NEXT:    br i1 [[TMP794]], label %[[PRED_STORE_IF523:.*]], label %[[PRED_STORE_CONTINUE524:.*]]
+; CHECK:       [[PRED_STORE_IF523]]:
+; CHECK-NEXT:    [[TMP795:%.*]] = add i64 [[INDEX]], 262
+; CHECK-NEXT:    [[TMP796:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP795]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP796]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE524]]
+; CHECK:       [[PRED_STORE_CONTINUE524]]:
+; CHECK-NEXT:    [[TMP797:%.*]] = extractelement <64 x i1> [[TMP4]], i32 7
+; CHECK-NEXT:    br i1 [[TMP797]], label %[[PRED_STORE_IF525:.*]], label %[[PRED_STORE_CONTINUE526:.*]]
+; CHECK:       [[PRED_STORE_IF525]]:
+; CHECK-NEXT:    [[TMP798:%.*]] = add i64 [[INDEX]], 263
+; CHECK-NEXT:    [[TMP799:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP798]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP799]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE526]]
+; CHECK:       [[PRED_STORE_CONTINUE526]]:
+; CHECK-NEXT:    [[TMP800:%.*]] = extractelement <64 x i1> [[TMP4]], i32 8
+; CHECK-NEXT:    br i1 [[TMP800]], label %[[PRED_STORE_IF527:.*]], label %[[PRED_STORE_CONTINUE528:.*]]
+; CHECK:       [[PRED_STORE_IF527]]:
+; CHECK-NEXT:    [[TMP801:%.*]] = add i64 [[INDEX]], 264
+; CHECK-NEXT:    [[TMP802:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP801]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP802]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE528]]
+; CHECK:       [[PRED_STORE_CONTINUE528]]:
+; CHECK-NEXT:    [[TMP803:%.*]] = extractelement <64 x i1> [[TMP4]], i32 9
+; CHECK-NEXT:    br i1 [[TMP803]], label %[[PRED_STORE_IF529:.*]], label %[[PRED_STORE_CONTINUE530:.*]]
+; CHECK:       [[PRED_STORE_IF529]]:
+; CHECK-NEXT:    [[TMP804:%.*]] = add i64 [[INDEX]], 265
+; CHECK-NEXT:    [[TMP805:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP804]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP805]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE530]]
+; CHECK:       [[PRED_STORE_CONTINUE530]]:
+; CHECK-NEXT:    [[TMP806:%.*]] = extractelement <64 x i1> [[TMP4]], i32 10
+; CHECK-NEXT:    br i1 [[TMP806]], label %[[PRED_STORE_IF531:.*]], label %[[PRED_STORE_CONTINUE532:.*]]
+; CHECK:       [[PRED_STORE_IF531]]:
+; CHECK-NEXT:    [[TMP807:%.*]] = add i64 [[INDEX]], 266
+; CHECK-NEXT:    [[TMP808:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP807]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP808]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE532]]
+; CHECK:       [[PRED_STORE_CONTINUE532]]:
+; CHECK-NEXT:    [[TMP809:%.*]] = extractelement <64 x i1> [[TMP4]], i32 11
+; CHECK-NEXT:    br i1 [[TMP809]], label %[[PRED_STORE_IF533:.*]], label %[[PRED_STORE_CONTINUE534:.*]]
+; CHECK:       [[PRED_STORE_IF533]]:
+; CHECK-NEXT:    [[TMP810:%.*]] = add i64 [[INDEX]], 267
+; CHECK-NEXT:    [[TMP811:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP810]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP811]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE534]]
+; CHECK:       [[PRED_STORE_CONTINUE534]]:
+; CHECK-NEXT:    [[TMP812:%.*]] = extractelement <64 x i1> [[TMP4]], i32 12
+; CHECK-NEXT:    br i1 [[TMP812]], label %[[PRED_STORE_IF535:.*]], label %[[PRED_STORE_CONTINUE536:.*]]
+; CHECK:       [[PRED_STORE_IF535]]:
+; CHECK-NEXT:    [[TMP813:%.*]] = add i64 [[INDEX]], 268
+; CHECK-NEXT:    [[TMP814:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP813]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP814]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE536]]
+; CHECK:       [[PRED_STORE_CONTINUE536]]:
+; CHECK-NEXT:    [[TMP815:%.*]] = extractelement <64 x i1> [[TMP4]], i32 13
+; CHECK-NEXT:    br i1 [[TMP815]], label %[[PRED_STORE_IF537:.*]], label %[[PRED_STORE_CONTINUE538:.*]]
+; CHECK:       [[PRED_STORE_IF537]]:
+; CHECK-NEXT:    [[TMP816:%.*]] = add i64 [[INDEX]], 269
+; CHECK-NEXT:    [[TMP817:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP816]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP817]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE538]]
+; CHECK:       [[PRED_STORE_CONTINUE538]]:
+; CHECK-NEXT:    [[TMP818:%.*]] = extractelement <64 x i1> [[TMP4]], i32 14
+; CHECK-NEXT:    br i1 [[TMP818]], label %[[PRED_STORE_IF539:.*]], label %[[PRED_STORE_CONTINUE540:.*]]
+; CHECK:       [[PRED_STORE_IF539]]:
+; CHECK-NEXT:    [[TMP819:%.*]] = add i64 [[INDEX]], 270
+; CHECK-NEXT:    [[TMP820:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP819]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP820]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE540]]
+; CHECK:       [[PRED_STORE_CONTINUE540]]:
+; CHECK-NEXT:    [[TMP821:%.*]] = extractelement <64 x i1> [[TMP4]], i32 15
+; CHECK-NEXT:    br i1 [[TMP821]], label %[[PRED_STORE_IF541:.*]], label %[[PRED_STORE_CONTINUE542:.*]]
+; CHECK:       [[PRED_STORE_IF541]]:
+; CHECK-NEXT:    [[TMP822:%.*]] = add i64 [[INDEX]], 271
+; CHECK-NEXT:    [[TMP823:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP822]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP823]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE542]]
+; CHECK:       [[PRED_STORE_CONTINUE542]]:
+; CHECK-NEXT:    [[TMP824:%.*]] = extractelement <64 x i1> [[TMP4]], i32 16
+; CHECK-NEXT:    br i1 [[TMP824]], label %[[PRED_STORE_IF543:.*]], label %[[PRED_STORE_CONTINUE544:.*]]
+; CHECK:       [[PRED_STORE_IF543]]:
+; CHECK-NEXT:    [[TMP825:%.*]] = add i64 [[INDEX]], 272
+; CHECK-NEXT:    [[TMP826:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP825]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP826]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE544]]
+; CHECK:       [[PRED_STORE_CONTINUE544]]:
+; CHECK-NEXT:    [[TMP827:%.*]] = extractelement <64 x i1> [[TMP4]], i32 17
+; CHECK-NEXT:    br i1 [[TMP827]], label %[[PRED_STORE_IF545:.*]], label %[[PRED_STORE_CONTINUE546:.*]]
+; CHECK:       [[PRED_STORE_IF545]]:
+; CHECK-NEXT:    [[TMP828:%.*]] = add i64 [[INDEX]], 273
+; CHECK-NEXT:    [[TMP829:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP828]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP829]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE546]]
+; CHECK:       [[PRED_STORE_CONTINUE546]]:
+; CHECK-NEXT:    [[TMP830:%.*]] = extractelement <64 x i1> [[TMP4]], i32 18
+; CHECK-NEXT:    br i1 [[TMP830]], label %[[PRED_STORE_IF547:.*]], label %[[PRED_STORE_CONTINUE548:.*]]
+; CHECK:       [[PRED_STORE_IF547]]:
+; CHECK-NEXT:    [[TMP831:%.*]] = add i64 [[INDEX]], 274
+; CHECK-NEXT:    [[TMP832:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP831]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP832]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE548]]
+; CHECK:       [[PRED_STORE_CONTINUE548]]:
+; CHECK-NEXT:    [[TMP833:%.*]] = extractelement <64 x i1> [[TMP4]], i32 19
+; CHECK-NEXT:    br i1 [[TMP833]], label %[[PRED_STORE_IF549:.*]], label %[[PRED_STORE_CONTINUE550:.*]]
+; CHECK:       [[PRED_STORE_IF549]]:
+; CHECK-NEXT:    [[TMP834:%.*]] = add i64 [[INDEX]], 275
+; CHECK-NEXT:    [[TMP835:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP834]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP835]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE550]]
+; CHECK:       [[PRED_STORE_CONTINUE550]]:
+; CHECK-NEXT:    [[TMP836:%.*]] = extractelement <64 x i1> [[TMP4]], i32 20
+; CHECK-NEXT:    br i1 [[TMP836]], label %[[PRED_STORE_IF551:.*]], label %[[PRED_STORE_CONTINUE552:.*]]
+; CHECK:       [[PRED_STORE_IF551]]:
+; CHECK-NEXT:    [[TMP837:%.*]] = add i64 [[INDEX]], 276
+; CHECK-NEXT:    [[TMP838:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP837]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP838]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE552]]
+; CHECK:       [[PRED_STORE_CONTINUE552]]:
+; CHECK-NEXT:    [[TMP839:%.*]] = extractelement <64 x i1> [[TMP4]], i32 21
+; CHECK-NEXT:    br i1 [[TMP839]], label %[[PRED_STORE_IF553:.*]], label %[[PRED_STORE_CONTINUE554:.*]]
+; CHECK:       [[PRED_STORE_IF553]]:
+; CHECK-NEXT:    [[TMP840:%.*]] = add i64 [[INDEX]], 277
+; CHECK-NEXT:    [[TMP841:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP840]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP841]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE554]]
+; CHECK:       [[PRED_STORE_CONTINUE554]]:
+; CHECK-NEXT:    [[TMP842:%.*]] = extractelement <64 x i1> [[TMP4]], i32 22
+; CHECK-NEXT:    br i1 [[TMP842]], label %[[PRED_STORE_IF555:.*]], label %[[PRED_STORE_CONTINUE556:.*]]
+; CHECK:       [[PRED_STORE_IF555]]:
+; CHECK-NEXT:    [[TMP843:%.*]] = add i64 [[INDEX]], 278
+; CHECK-NEXT:    [[TMP844:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP843]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP844]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE556]]
+; CHECK:       [[PRED_STORE_CONTINUE556]]:
+; CHECK-NEXT:    [[TMP845:%.*]] = extractelement <64 x i1> [[TMP4]], i32 23
+; CHECK-NEXT:    br i1 [[TMP845]], label %[[PRED_STORE_IF557:.*]], label %[[PRED_STORE_CONTINUE558:.*]]
+; CHECK:       [[PRED_STORE_IF557]]:
+; CHECK-NEXT:    [[TMP846:%.*]] = add i64 [[INDEX]], 279
+; CHECK-NEXT:    [[TMP847:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP846]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP847]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE558]]
+; CHECK:       [[PRED_STORE_CONTINUE558]]:
+; CHECK-NEXT:    [[TMP848:%.*]] = extractelement <64 x i1> [[TMP4]], i32 24
+; CHECK-NEXT:    br i1 [[TMP848]], label %[[PRED_STORE_IF559:.*]], label %[[PRED_STORE_CONTINUE560:.*]]
+; CHECK:       [[PRED_STORE_IF559]]:
+; CHECK-NEXT:    [[TMP849:%.*]] = add i64 [[INDEX]], 280
+; CHECK-NEXT:    [[TMP850:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP849]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP850]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE560]]
+; CHECK:       [[PRED_STORE_CONTINUE560]]:
+; CHECK-NEXT:    [[TMP851:%.*]] = extractelement <64 x i1> [[TMP4]], i32 25
+; CHECK-NEXT:    br i1 [[TMP851]], label %[[PRED_STORE_IF561:.*]], label %[[PRED_STORE_CONTINUE562:.*]]
+; CHECK:       [[PRED_STORE_IF561]]:
+; CHECK-NEXT:    [[TMP852:%.*]] = add i64 [[INDEX]], 281
+; CHECK-NEXT:    [[TMP853:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP852]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP853]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE562]]
+; CHECK:       [[PRED_STORE_CONTINUE562]]:
+; CHECK-NEXT:    [[TMP854:%.*]] = extractelement <64 x i1> [[TMP4]], i32 26
+; CHECK-NEXT:    br i1 [[TMP854]], label %[[PRED_STORE_IF563:.*]], label %[[PRED_STORE_CONTINUE564:.*]]
+; CHECK:       [[PRED_STORE_IF563]]:
+; CHECK-NEXT:    [[TMP855:%.*]] = add i64 [[INDEX]], 282
+; CHECK-NEXT:    [[TMP856:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP855]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP856]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE564]]
+; CHECK:       [[PRED_STORE_CONTINUE564]]:
+; CHECK-NEXT:    [[TMP857:%.*]] = extractelement <64 x i1> [[TMP4]], i32 27
+; CHECK-NEXT:    br i1 [[TMP857]], label %[[PRED_STORE_IF565:.*]], label %[[PRED_STORE_CONTINUE566:.*]]
+; CHECK:       [[PRED_STORE_IF565]]:
+; CHECK-NEXT:    [[TMP858:%.*]] = add i64 [[INDEX]], 283
+; CHECK-NEXT:    [[TMP859:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP858]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP859]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE566]]
+; CHECK:       [[PRED_STORE_CONTINUE566]]:
+; CHECK-NEXT:    [[TMP860:%.*]] = extractelement <64 x i1> [[TMP4]], i32 28
+; CHECK-NEXT:    br i1 [[TMP860]], label %[[PRED_STORE_IF567:.*]], label %[[PRED_STORE_CONTINUE568:.*]]
+; CHECK:       [[PRED_STORE_IF567]]:
+; CHECK-NEXT:    [[TMP861:%.*]] = add i64 [[INDEX]], 284
+; CHECK-NEXT:    [[TMP862:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP861]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP862]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE568]]
+; CHECK:       [[PRED_STORE_CONTINUE568]]:
+; CHECK-NEXT:    [[TMP863:%.*]] = extractelement <64 x i1> [[TMP4]], i32 29
+; CHECK-NEXT:    br i1 [[TMP863]], label %[[PRED_STORE_IF569:.*]], label %[[PRED_STORE_CONTINUE570:.*]]
+; CHECK:       [[PRED_STORE_IF569]]:
+; CHECK-NEXT:    [[TMP864:%.*]] = add i64 [[INDEX]], 285
+; CHECK-NEXT:    [[TMP865:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP864]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP865]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE570]]
+; CHECK:       [[PRED_STORE_CONTINUE570]]:
+; CHECK-NEXT:    [[TMP866:%.*]] = extractelement <64 x i1> [[TMP4]], i32 30
+; CHECK-NEXT:    br i1 [[TMP866]], label %[[PRED_STORE_IF571:.*]], label %[[PRED_STORE_CONTINUE572:.*]]
+; CHECK:       [[PRED_STORE_IF571]]:
+; CHECK-NEXT:    [[TMP867:%.*]] = add i64 [[INDEX]], 286
+; CHECK-NEXT:    [[TMP868:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP867]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP868]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE572]]
+; CHECK:       [[PRED_STORE_CONTINUE572]]:
+; CHECK-NEXT:    [[TMP869:%.*]] = extractelement <64 x i1> [[TMP4]], i32 31
+; CHECK-NEXT:    br i1 [[TMP869]], label %[[PRED_STORE_IF573:.*]], label %[[PRED_STORE_CONTINUE574:.*]]
+; CHECK:       [[PRED_STORE_IF573]]:
+; CHECK-NEXT:    [[TMP870:%.*]] = add i64 [[INDEX]], 287
+; CHECK-NEXT:    [[TMP871:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP870]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP871]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE574]]
+; CHECK:       [[PRED_STORE_CONTINUE574]]:
+; CHECK-NEXT:    [[TMP872:%.*]] = extractelement <64 x i1> [[TMP4]], i32 32
+; CHECK-NEXT:    br i1 [[TMP872]], label %[[PRED_STORE_IF575:.*]], label %[[PRED_STORE_CONTINUE576:.*]]
+; CHECK:       [[PRED_STORE_IF575]]:
+; CHECK-NEXT:    [[TMP873:%.*]] = add i64 [[INDEX]], 288
+; CHECK-NEXT:    [[TMP874:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP873]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP874]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE576]]
+; CHECK:       [[PRED_STORE_CONTINUE576]]:
+; CHECK-NEXT:    [[TMP875:%.*]] = extractelement <64 x i1> [[TMP4]], i32 33
+; CHECK-NEXT:    br i1 [[TMP875]], label %[[PRED_STORE_IF577:.*]], label %[[PRED_STORE_CONTINUE578:.*]]
+; CHECK:       [[PRED_STORE_IF577]]:
+; CHECK-NEXT:    [[TMP876:%.*]] = add i64 [[INDEX]], 289
+; CHECK-NEXT:    [[TMP877:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP876]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP877]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE578]]
+; CHECK:       [[PRED_STORE_CONTINUE578]]:
+; CHECK-NEXT:    [[TMP878:%.*]] = extractelement <64 x i1> [[TMP4]], i32 34
+; CHECK-NEXT:    br i1 [[TMP878]], label %[[PRED_STORE_IF579:.*]], label %[[PRED_STORE_CONTINUE580:.*]]
+; CHECK:       [[PRED_STORE_IF579]]:
+; CHECK-NEXT:    [[TMP879:%.*]] = add i64 [[INDEX]], 290
+; CHECK-NEXT:    [[TMP880:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP879]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP880]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE580]]
+; CHECK:       [[PRED_STORE_CONTINUE580]]:
+; CHECK-NEXT:    [[TMP881:%.*]] = extractelement <64 x i1> [[TMP4]], i32 35
+; CHECK-NEXT:    br i1 [[TMP881]], label %[[PRED_STORE_IF581:.*]], label %[[PRED_STORE_CONTINUE582:.*]]
+; CHECK:       [[PRED_STORE_IF581]]:
+; CHECK-NEXT:    [[TMP882:%.*]] = add i64 [[INDEX]], 291
+; CHECK-NEXT:    [[TMP883:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP882]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP883]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE582]]
+; CHECK:       [[PRED_STORE_CONTINUE582]]:
+; CHECK-NEXT:    [[TMP884:%.*]] = extractelement <64 x i1> [[TMP4]], i32 36
+; CHECK-NEXT:    br i1 [[TMP884]], label %[[PRED_STORE_IF583:.*]], label %[[PRED_STORE_CONTINUE584:.*]]
+; CHECK:       [[PRED_STORE_IF583]]:
+; CHECK-NEXT:    [[TMP885:%.*]] = add i64 [[INDEX]], 292
+; CHECK-NEXT:    [[TMP886:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP885]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP886]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE584]]
+; CHECK:       [[PRED_STORE_CONTINUE584]]:
+; CHECK-NEXT:    [[TMP887:%.*]] = extractelement <64 x i1> [[TMP4]], i32 37
+; CHECK-NEXT:    br i1 [[TMP887]], label %[[PRED_STORE_IF585:.*]], label %[[PRED_STORE_CONTINUE586:.*]]
+; CHECK:       [[PRED_STORE_IF585]]:
+; CHECK-NEXT:    [[TMP888:%.*]] = add i64 [[INDEX]], 293
+; CHECK-NEXT:    [[TMP889:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP888]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP889]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE586]]
+; CHECK:       [[PRED_STORE_CONTINUE586]]:
+; CHECK-NEXT:    [[TMP890:%.*]] = extractelement <64 x i1> [[TMP4]], i32 38
+; CHECK-NEXT:    br i1 [[TMP890]], label %[[PRED_STORE_IF587:.*]], label %[[PRED_STORE_CONTINUE588:.*]]
+; CHECK:       [[PRED_STORE_IF587]]:
+; CHECK-NEXT:    [[TMP891:%.*]] = add i64 [[INDEX]], 294
+; CHECK-NEXT:    [[TMP892:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP891]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP892]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE588]]
+; CHECK:       [[PRED_STORE_CONTINUE588]]:
+; CHECK-NEXT:    [[TMP893:%.*]] = extractelement <64 x i1> [[TMP4]], i32 39
+; CHECK-NEXT:    br i1 [[TMP893]], label %[[PRED_STORE_IF589:.*]], label %[[PRED_STORE_CONTINUE590:.*]]
+; CHECK:       [[PRED_STORE_IF589]]:
+; CHECK-NEXT:    [[TMP894:%.*]] = add i64 [[INDEX]], 295
+; CHECK-NEXT:    [[TMP895:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP894]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP895]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE590]]
+; CHECK:       [[PRED_STORE_CONTINUE590]]:
+; CHECK-NEXT:    [[TMP896:%.*]] = extractelement <64 x i1> [[TMP4]], i32 40
+; CHECK-NEXT:    br i1 [[TMP896]], label %[[PRED_STORE_IF591:.*]], label %[[PRED_STORE_CONTINUE592:.*]]
+; CHECK:       [[PRED_STORE_IF591]]:
+; CHECK-NEXT:    [[TMP897:%.*]] = add i64 [[INDEX]], 296
+; CHECK-NEXT:    [[TMP898:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP897]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP898]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE592]]
+; CHECK:       [[PRED_STORE_CONTINUE592]]:
+; CHECK-NEXT:    [[TMP899:%.*]] = extractelement <64 x i1> [[TMP4]], i32 41
+; CHECK-NEXT:    br i1 [[TMP899]], label %[[PRED_STORE_IF593:.*]], label %[[PRED_STORE_CONTINUE594:.*]]
+; CHECK:       [[PRED_STORE_IF593]]:
+; CHECK-NEXT:    [[TMP900:%.*]] = add i64 [[INDEX]], 297
+; CHECK-NEXT:    [[TMP901:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP900]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP901]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE594]]
+; CHECK:       [[PRED_STORE_CONTINUE594]]:
+; CHECK-NEXT:    [[TMP902:%.*]] = extractelement <64 x i1> [[TMP4]], i32 42
+; CHECK-NEXT:    br i1 [[TMP902]], label %[[PRED_STORE_IF595:.*]], label %[[PRED_STORE_CONTINUE596:.*]]
+; CHECK:       [[PRED_STORE_IF595]]:
+; CHECK-NEXT:    [[TMP903:%.*]] = add i64 [[INDEX]], 298
+; CHECK-NEXT:    [[TMP904:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP903]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP904]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE596]]
+; CHECK:       [[PRED_STORE_CONTINUE596]]:
+; CHECK-NEXT:    [[TMP905:%.*]] = extractelement <64 x i1> [[TMP4]], i32 43
+; CHECK-NEXT:    br i1 [[TMP905]], label %[[PRED_STORE_IF597:.*]], label %[[PRED_STORE_CONTINUE598:.*]]
+; CHECK:       [[PRED_STORE_IF597]]:
+; CHECK-NEXT:    [[TMP906:%.*]] = add i64 [[INDEX]], 299
+; CHECK-NEXT:    [[TMP907:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP906]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP907]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE598]]
+; CHECK:       [[PRED_STORE_CONTINUE598]]:
+; CHECK-NEXT:    [[TMP908:%.*]] = extractelement <64 x i1> [[TMP4]], i32 44
+; CHECK-NEXT:    br i1 [[TMP908]], label %[[PRED_STORE_IF599:.*]], label %[[PRED_STORE_CONTINUE600:.*]]
+; CHECK:       [[PRED_STORE_IF599]]:
+; CHECK-NEXT:    [[TMP909:%.*]] = add i64 [[INDEX]], 300
+; CHECK-NEXT:    [[TMP910:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP909]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP910]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE600]]
+; CHECK:       [[PRED_STORE_CONTINUE600]]:
+; CHECK-NEXT:    [[TMP911:%.*]] = extractelement <64 x i1> [[TMP4]], i32 45
+; CHECK-NEXT:    br i1 [[TMP911]], label %[[PRED_STORE_IF601:.*]], label %[[PRED_STORE_CONTINUE602:.*]]
+; CHECK:       [[PRED_STORE_IF601]]:
+; CHECK-NEXT:    [[TMP912:%.*]] = add i64 [[INDEX]], 301
+; CHECK-NEXT:    [[TMP913:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP912]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP913]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE602]]
+; CHECK:       [[PRED_STORE_CONTINUE602]]:
+; CHECK-NEXT:    [[TMP914:%.*]] = extractelement <64 x i1> [[TMP4]], i32 46
+; CHECK-NEXT:    br i1 [[TMP914]], label %[[PRED_STORE_IF603:.*]], label %[[PRED_STORE_CONTINUE604:.*]]
+; CHECK:       [[PRED_STORE_IF603]]:
+; CHECK-NEXT:    [[TMP915:%.*]] = add i64 [[INDEX]], 302
+; CHECK-NEXT:    [[TMP916:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP915]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP916]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE604]]
+; CHECK:       [[PRED_STORE_CONTINUE604]]:
+; CHECK-NEXT:    [[TMP917:%.*]] = extractelement <64 x i1> [[TMP4]], i32 47
+; CHECK-NEXT:    br i1 [[TMP917]], label %[[PRED_STORE_IF605:.*]], label %[[PRED_STORE_CONTINUE606:.*]]
+; CHECK:       [[PRED_STORE_IF605]]:
+; CHECK-NEXT:    [[TMP918:%.*]] = add i64 [[INDEX]], 303
+; CHECK-NEXT:    [[TMP919:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP918]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP919]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE606]]
+; CHECK:       [[PRED_STORE_CONTINUE606]]:
+; CHECK-NEXT:    [[TMP920:%.*]] = extractelement <64 x i1> [[TMP4]], i32 48
+; CHECK-NEXT:    br i1 [[TMP920]], label %[[PRED_STORE_IF607:.*]], label %[[PRED_STORE_CONTINUE608:.*]]
+; CHECK:       [[PRED_STORE_IF607]]:
+; CHECK-NEXT:    [[TMP921:%.*]] = add i64 [[INDEX]], 304
+; CHECK-NEXT:    [[TMP922:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP921]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP922]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE608]]
+; CHECK:       [[PRED_STORE_CONTINUE608]]:
+; CHECK-NEXT:    [[TMP923:%.*]] = extractelement <64 x i1> [[TMP4]], i32 49
+; CHECK-NEXT:    br i1 [[TMP923]], label %[[PRED_STORE_IF609:.*]], label %[[PRED_STORE_CONTINUE610:.*]]
+; CHECK:       [[PRED_STORE_IF609]]:
+; CHECK-NEXT:    [[TMP924:%.*]] = add i64 [[INDEX]], 305
+; CHECK-NEXT:    [[TMP925:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP924]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP925]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE610]]
+; CHECK:       [[PRED_STORE_CONTINUE610]]:
+; CHECK-NEXT:    [[TMP926:%.*]] = extractelement <64 x i1> [[TMP4]], i32 50
+; CHECK-NEXT:    br i1 [[TMP926]], label %[[PRED_STORE_IF611:.*]], label %[[PRED_STORE_CONTINUE612:.*]]
+; CHECK:       [[PRED_STORE_IF611]]:
+; CHECK-NEXT:    [[TMP927:%.*]] = add i64 [[INDEX]], 306
+; CHECK-NEXT:    [[TMP928:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP927]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP928]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE612]]
+; CHECK:       [[PRED_STORE_CONTINUE612]]:
+; CHECK-NEXT:    [[TMP929:%.*]] = extractelement <64 x i1> [[TMP4]], i32 51
+; CHECK-NEXT:    br i1 [[TMP929]], label %[[PRED_STORE_IF613:.*]], label %[[PRED_STORE_CONTINUE614:.*]]
+; CHECK:       [[PRED_STORE_IF613]]:
+; CHECK-NEXT:    [[TMP930:%.*]] = add i64 [[INDEX]], 307
+; CHECK-NEXT:    [[TMP931:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP930]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP931]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE614]]
+; CHECK:       [[PRED_STORE_CONTINUE614]]:
+; CHECK-NEXT:    [[TMP932:%.*]] = extractelement <64 x i1> [[TMP4]], i32 52
+; CHECK-NEXT:    br i1 [[TMP932]], label %[[PRED_STORE_IF615:.*]], label %[[PRED_STORE_CONTINUE616:.*]]
+; CHECK:       [[PRED_STORE_IF615]]:
+; CHECK-NEXT:    [[TMP933:%.*]] = add i64 [[INDEX]], 308
+; CHECK-NEXT:    [[TMP934:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP933]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP934]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE616]]
+; CHECK:       [[PRED_STORE_CONTINUE616]]:
+; CHECK-NEXT:    [[TMP935:%.*]] = extractelement <64 x i1> [[TMP4]], i32 53
+; CHECK-NEXT:    br i1 [[TMP935]], label %[[PRED_STORE_IF617:.*]], label %[[PRED_STORE_CONTINUE618:.*]]
+; CHECK:       [[PRED_STORE_IF617]]:
+; CHECK-NEXT:    [[TMP936:%.*]] = add i64 [[INDEX]], 309
+; CHECK-NEXT:    [[TMP937:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP936]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP937]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE618]]
+; CHECK:       [[PRED_STORE_CONTINUE618]]:
+; CHECK-NEXT:    [[TMP938:%.*]] = extractelement <64 x i1> [[TMP4]], i32 54
+; CHECK-NEXT:    br i1 [[TMP938]], label %[[PRED_STORE_IF619:.*]], label %[[PRED_STORE_CONTINUE620:.*]]
+; CHECK:       [[PRED_STORE_IF619]]:
+; CHECK-NEXT:    [[TMP939:%.*]] = add i64 [[INDEX]], 310
+; CHECK-NEXT:    [[TMP940:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP939]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP940]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE620]]
+; CHECK:       [[PRED_STORE_CONTINUE620]]:
+; CHECK-NEXT:    [[TMP941:%.*]] = extractelement <64 x i1> [[TMP4]], i32 55
+; CHECK-NEXT:    br i1 [[TMP941]], label %[[PRED_STORE_IF621:.*]], label %[[PRED_STORE_CONTINUE622:.*]]
+; CHECK:       [[PRED_STORE_IF621]]:
+; CHECK-NEXT:    [[TMP942:%.*]] = add i64 [[INDEX]], 311
+; CHECK-NEXT:    [[TMP943:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP942]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP943]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE622]]
+; CHECK:       [[PRED_STORE_CONTINUE622]]:
+; CHECK-NEXT:    [[TMP944:%.*]] = extractelement <64 x i1> [[TMP4]], i32 56
+; CHECK-NEXT:    br i1 [[TMP944]], label %[[PRED_STORE_IF623:.*]], label %[[PRED_STORE_CONTINUE624:.*]]
+; CHECK:       [[PRED_STORE_IF623]]:
+; CHECK-NEXT:    [[TMP945:%.*]] = add i64 [[INDEX]], 312
+; CHECK-NEXT:    [[TMP946:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP945]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP946]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE624]]
+; CHECK:       [[PRED_STORE_CONTINUE624]]:
+; CHECK-NEXT:    [[TMP947:%.*]] = extractelement <64 x i1> [[TMP4]], i32 57
+; CHECK-NEXT:    br i1 [[TMP947]], label %[[PRED_STORE_IF625:.*]], label %[[PRED_STORE_CONTINUE626:.*]]
+; CHECK:       [[PRED_STORE_IF625]]:
+; CHECK-NEXT:    [[TMP948:%.*]] = add i64 [[INDEX]], 313
+; CHECK-NEXT:    [[TMP949:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP948]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP949]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE626]]
+; CHECK:       [[PRED_STORE_CONTINUE626]]:
+; CHECK-NEXT:    [[TMP950:%.*]] = extractelement <64 x i1> [[TMP4]], i32 58
+; CHECK-NEXT:    br i1 [[TMP950]], label %[[PRED_STORE_IF627:.*]], label %[[PRED_STORE_CONTINUE628:.*]]
+; CHECK:       [[PRED_STORE_IF627]]:
+; CHECK-NEXT:    [[TMP951:%.*]] = add i64 [[INDEX]], 314
+; CHECK-NEXT:    [[TMP952:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP951]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP952]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE628]]
+; CHECK:       [[PRED_STORE_CONTINUE628]]:
+; CHECK-NEXT:    [[TMP953:%.*]] = extractelement <64 x i1> [[TMP4]], i32 59
+; CHECK-NEXT:    br i1 [[TMP953]], label %[[PRED_STORE_IF629:.*]], label %[[PRED_STORE_CONTINUE630:.*]]
+; CHECK:       [[PRED_STORE_IF629]]:
+; CHECK-NEXT:    [[TMP954:%.*]] = add i64 [[INDEX]], 315
+; CHECK-NEXT:    [[TMP955:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP954]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP955]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE630]]
+; CHECK:       [[PRED_STORE_CONTINUE630]]:
+; CHECK-NEXT:    [[TMP956:%.*]] = extractelement <64 x i1> [[TMP4]], i32 60
+; CHECK-NEXT:    br i1 [[TMP956]], label %[[PRED_STORE_IF631:.*]], label %[[PRED_STORE_CONTINUE632:.*]]
+; CHECK:       [[PRED_STORE_IF631]]:
+; CHECK-NEXT:    [[TMP957:%.*]] = add i64 [[INDEX]], 316
+; CHECK-NEXT:    [[TMP958:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP957]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP958]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE632]]
+; CHECK:       [[PRED_STORE_CONTINUE632]]:
+; CHECK-NEXT:    [[TMP959:%.*]] = extractelement <64 x i1> [[TMP4]], i32 61
+; CHECK-NEXT:    br i1 [[TMP959]], label %[[PRED_STORE_IF633:.*]], label %[[PRED_STORE_CONTINUE634:.*]]
+; CHECK:       [[PRED_STORE_IF633]]:
+; CHECK-NEXT:    [[TMP960:%.*]] = add i64 [[INDEX]], 317
+; CHECK-NEXT:    [[TMP961:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP960]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP961]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE634]]
+; CHECK:       [[PRED_STORE_CONTINUE634]]:
+; CHECK-NEXT:    [[TMP962:%.*]] = extractelement <64 x i1> [[TMP4]], i32 62
+; CHECK-NEXT:    br i1 [[TMP962]], label %[[PRED_STORE_IF635:.*]], label %[[PRED_STORE_CONTINUE636:.*]]
+; CHECK:       [[PRED_STORE_IF635]]:
+; CHECK-NEXT:    [[TMP963:%.*]] = add i64 [[INDEX]], 318
+; CHECK-NEXT:    [[TMP964:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP963]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP964]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE636]]
+; CHECK:       [[PRED_STORE_CONTINUE636]]:
+; CHECK-NEXT:    [[TMP965:%.*]] = extractelement <64 x i1> [[TMP4]], i32 63
+; CHECK-NEXT:    br i1 [[TMP965]], label %[[PRED_STORE_IF637:.*]], label %[[PRED_STORE_CONTINUE638:.*]]
+; CHECK:       [[PRED_STORE_IF637]]:
+; CHECK-NEXT:    [[TMP966:%.*]] = add i64 [[INDEX]], 319
+; CHECK-NEXT:    [[TMP967:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP966]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP967]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE638]]
+; CHECK:       [[PRED_STORE_CONTINUE638]]:
+; CHECK-NEXT:    [[TMP968:%.*]] = extractelement <64 x i1> [[TMP5]], i32 0
+; CHECK-NEXT:    br i1 [[TMP968]], label %[[PRED_STORE_IF639:.*]], label %[[PRED_STORE_CONTINUE640:.*]]
+; CHECK:       [[PRED_STORE_IF639]]:
+; CHECK-NEXT:    [[TMP969:%.*]] = add i64 [[INDEX]], 320
+; CHECK-NEXT:    [[TMP970:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP969]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP970]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE640]]
+; CHECK:       [[PRED_STORE_CONTINUE640]]:
+; CHECK-NEXT:    [[TMP971:%.*]] = extractelement <64 x i1> [[TMP5]], i32 1
+; CHECK-NEXT:    br i1 [[TMP971]], label %[[PRED_STORE_IF641:.*]], label %[[PRED_STORE_CONTINUE642:.*]]
+; CHECK:       [[PRED_STORE_IF641]]:
+; CHECK-NEXT:    [[TMP972:%.*]] = add i64 [[INDEX]], 321
+; CHECK-NEXT:    [[TMP973:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP972]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP973]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE642]]
+; CHECK:       [[PRED_STORE_CONTINUE642]]:
+; CHECK-NEXT:    [[TMP974:%.*]] = extractelement <64 x i1> [[TMP5]], i32 2
+; CHECK-NEXT:    br i1 [[TMP974]], label %[[PRED_STORE_IF643:.*]], label %[[PRED_STORE_CONTINUE644:.*]]
+; CHECK:       [[PRED_STORE_IF643]]:
+; CHECK-NEXT:    [[TMP975:%.*]] = add i64 [[INDEX]], 322
+; CHECK-NEXT:    [[TMP976:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP975]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP976]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE644]]
+; CHECK:       [[PRED_STORE_CONTINUE644]]:
+; CHECK-NEXT:    [[TMP977:%.*]] = extractelement <64 x i1> [[TMP5]], i32 3
+; CHECK-NEXT:    br i1 [[TMP977]], label %[[PRED_STORE_IF645:.*]], label %[[PRED_STORE_CONTINUE646:.*]]
+; CHECK:       [[PRED_STORE_IF645]]:
+; CHECK-NEXT:    [[TMP978:%.*]] = add i64 [[INDEX]], 323
+; CHECK-NEXT:    [[TMP979:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP978]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP979]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE646]]
+; CHECK:       [[PRED_STORE_CONTINUE646]]:
+; CHECK-NEXT:    [[TMP980:%.*]] = extractelement <64 x i1> [[TMP5]], i32 4
+; CHECK-NEXT:    br i1 [[TMP980]], label %[[PRED_STORE_IF647:.*]], label %[[PRED_STORE_CONTINUE648:.*]]
+; CHECK:       [[PRED_STORE_IF647]]:
+; CHECK-NEXT:    [[TMP981:%.*]] = add i64 [[INDEX]], 324
+; CHECK-NEXT:    [[TMP982:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP981]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP982]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE648]]
+; CHECK:       [[PRED_STORE_CONTINUE648]]:
+; CHECK-NEXT:    [[TMP983:%.*]] = extractelement <64 x i1> [[TMP5]], i32 5
+; CHECK-NEXT:    br i1 [[TMP983]], label %[[PRED_STORE_IF649:.*]], label %[[PRED_STORE_CONTINUE650:.*]]
+; CHECK:       [[PRED_STORE_IF649]]:
+; CHECK-NEXT:    [[TMP984:%.*]] = add i64 [[INDEX]], 325
+; CHECK-NEXT:    [[TMP985:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP984]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP985]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE650]]
+; CHECK:       [[PRED_STORE_CONTINUE650]]:
+; CHECK-NEXT:    [[TMP986:%.*]] = extractelement <64 x i1> [[TMP5]], i32 6
+; CHECK-NEXT:    br i1 [[TMP986]], label %[[PRED_STORE_IF651:.*]], label %[[PRED_STORE_CONTINUE652:.*]]
+; CHECK:       [[PRED_STORE_IF651]]:
+; CHECK-NEXT:    [[TMP987:%.*]] = add i64 [[INDEX]], 326
+; CHECK-NEXT:    [[TMP988:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP987]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP988]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE652]]
+; CHECK:       [[PRED_STORE_CONTINUE652]]:
+; CHECK-NEXT:    [[TMP989:%.*]] = extractelement <64 x i1> [[TMP5]], i32 7
+; CHECK-NEXT:    br i1 [[TMP989]], label %[[PRED_STORE_IF653:.*]], label %[[PRED_STORE_CONTINUE654:.*]]
+; CHECK:       [[PRED_STORE_IF653]]:
+; CHECK-NEXT:    [[TMP990:%.*]] = add i64 [[INDEX]], 327
+; CHECK-NEXT:    [[TMP991:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP990]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP991]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE654]]
+; CHECK:       [[PRED_STORE_CONTINUE654]]:
+; CHECK-NEXT:    [[TMP992:%.*]] = extractelement <64 x i1> [[TMP5]], i32 8
+; CHECK-NEXT:    br i1 [[TMP992]], label %[[PRED_STORE_IF655:.*]], label %[[PRED_STORE_CONTINUE656:.*]]
+; CHECK:       [[PRED_STORE_IF655]]:
+; CHECK-NEXT:    [[TMP993:%.*]] = add i64 [[INDEX]], 328
+; CHECK-NEXT:    [[TMP994:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP993]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP994]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE656]]
+; CHECK:       [[PRED_STORE_CONTINUE656]]:
+; CHECK-NEXT:    [[TMP995:%.*]] = extractelement <64 x i1> [[TMP5]], i32 9
+; CHECK-NEXT:    br i1 [[TMP995]], label %[[PRED_STORE_IF657:.*]], label %[[PRED_STORE_CONTINUE658:.*]]
+; CHECK:       [[PRED_STORE_IF657]]:
+; CHECK-NEXT:    [[TMP996:%.*]] = add i64 [[INDEX]], 329
+; CHECK-NEXT:    [[TMP997:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP996]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP997]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE658]]
+; CHECK:       [[PRED_STORE_CONTINUE658]]:
+; CHECK-NEXT:    [[TMP998:%.*]] = extractelement <64 x i1> [[TMP5]], i32 10
+; CHECK-NEXT:    br i1 [[TMP998]], label %[[PRED_STORE_IF659:.*]], label %[[PRED_STORE_CONTINUE660:.*]]
+; CHECK:       [[PRED_STORE_IF659]]:
+; CHECK-NEXT:    [[TMP999:%.*]] = add i64 [[INDEX]], 330
+; CHECK-NEXT:    [[TMP1000:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP999]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1000]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE660]]
+; CHECK:       [[PRED_STORE_CONTINUE660]]:
+; CHECK-NEXT:    [[TMP1001:%.*]] = extractelement <64 x i1> [[TMP5]], i32 11
+; CHECK-NEXT:    br i1 [[TMP1001]], label %[[PRED_STORE_IF661:.*]], label %[[PRED_STORE_CONTINUE662:.*]]
+; CHECK:       [[PRED_STORE_IF661]]:
+; CHECK-NEXT:    [[TMP1002:%.*]] = add i64 [[INDEX]], 331
+; CHECK-NEXT:    [[TMP1003:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1002]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1003]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE662]]
+; CHECK:       [[PRED_STORE_CONTINUE662]]:
+; CHECK-NEXT:    [[TMP1004:%.*]] = extractelement <64 x i1> [[TMP5]], i32 12
+; CHECK-NEXT:    br i1 [[TMP1004]], label %[[PRED_STORE_IF663:.*]], label %[[PRED_STORE_CONTINUE664:.*]]
+; CHECK:       [[PRED_STORE_IF663]]:
+; CHECK-NEXT:    [[TMP1005:%.*]] = add i64 [[INDEX]], 332
+; CHECK-NEXT:    [[TMP1006:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1005]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1006]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE664]]
+; CHECK:       [[PRED_STORE_CONTINUE664]]:
+; CHECK-NEXT:    [[TMP1007:%.*]] = extractelement <64 x i1> [[TMP5]], i32 13
+; CHECK-NEXT:    br i1 [[TMP1007]], label %[[PRED_STORE_IF665:.*]], label %[[PRED_STORE_CONTINUE666:.*]]
+; CHECK:       [[PRED_STORE_IF665]]:
+; CHECK-NEXT:    [[TMP1008:%.*]] = add i64 [[INDEX]], 333
+; CHECK-NEXT:    [[TMP1009:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1008]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1009]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE666]]
+; CHECK:       [[PRED_STORE_CONTINUE666]]:
+; CHECK-NEXT:    [[TMP1010:%.*]] = extractelement <64 x i1> [[TMP5]], i32 14
+; CHECK-NEXT:    br i1 [[TMP1010]], label %[[PRED_STORE_IF667:.*]], label %[[PRED_STORE_CONTINUE668:.*]]
+; CHECK:       [[PRED_STORE_IF667]]:
+; CHECK-NEXT:    [[TMP1011:%.*]] = add i64 [[INDEX]], 334
+; CHECK-NEXT:    [[TMP1012:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1011]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1012]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE668]]
+; CHECK:       [[PRED_STORE_CONTINUE668]]:
+; CHECK-NEXT:    [[TMP1013:%.*]] = extractelement <64 x i1> [[TMP5]], i32 15
+; CHECK-NEXT:    br i1 [[TMP1013]], label %[[PRED_STORE_IF669:.*]], label %[[PRED_STORE_CONTINUE670:.*]]
+; CHECK:       [[PRED_STORE_IF669]]:
+; CHECK-NEXT:    [[TMP1014:%.*]] = add i64 [[INDEX]], 335
+; CHECK-NEXT:    [[TMP1015:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1014]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1015]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE670]]
+; CHECK:       [[PRED_STORE_CONTINUE670]]:
+; CHECK-NEXT:    [[TMP1016:%.*]] = extractelement <64 x i1> [[TMP5]], i32 16
+; CHECK-NEXT:    br i1 [[TMP1016]], label %[[PRED_STORE_IF671:.*]], label %[[PRED_STORE_CONTINUE672:.*]]
+; CHECK:       [[PRED_STORE_IF671]]:
+; CHECK-NEXT:    [[TMP1017:%.*]] = add i64 [[INDEX]], 336
+; CHECK-NEXT:    [[TMP1018:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1017]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1018]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE672]]
+; CHECK:       [[PRED_STORE_CONTINUE672]]:
+; CHECK-NEXT:    [[TMP1019:%.*]] = extractelement <64 x i1> [[TMP5]], i32 17
+; CHECK-NEXT:    br i1 [[TMP1019]], label %[[PRED_STORE_IF673:.*]], label %[[PRED_STORE_CONTINUE674:.*]]
+; CHECK:       [[PRED_STORE_IF673]]:
+; CHECK-NEXT:    [[TMP1020:%.*]] = add i64 [[INDEX]], 337
+; CHECK-NEXT:    [[TMP1021:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1020]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1021]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE674]]
+; CHECK:       [[PRED_STORE_CONTINUE674]]:
+; CHECK-NEXT:    [[TMP1022:%.*]] = extractelement <64 x i1> [[TMP5]], i32 18
+; CHECK-NEXT:    br i1 [[TMP1022]], label %[[PRED_STORE_IF675:.*]], label %[[PRED_STORE_CONTINUE676:.*]]
+; CHECK:       [[PRED_STORE_IF675]]:
+; CHECK-NEXT:    [[TMP1023:%.*]] = add i64 [[INDEX]], 338
+; CHECK-NEXT:    [[TMP1024:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1023]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1024]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE676]]
+; CHECK:       [[PRED_STORE_CONTINUE676]]:
+; CHECK-NEXT:    [[TMP1025:%.*]] = extractelement <64 x i1> [[TMP5]], i32 19
+; CHECK-NEXT:    br i1 [[TMP1025]], label %[[PRED_STORE_IF677:.*]], label %[[PRED_STORE_CONTINUE678:.*]]
+; CHECK:       [[PRED_STORE_IF677]]:
+; CHECK-NEXT:    [[TMP1026:%.*]] = add i64 [[INDEX]], 339
+; CHECK-NEXT:    [[TMP1027:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1026]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1027]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE678]]
+; CHECK:       [[PRED_STORE_CONTINUE678]]:
+; CHECK-NEXT:    [[TMP1028:%.*]] = extractelement <64 x i1> [[TMP5]], i32 20
+; CHECK-NEXT:    br i1 [[TMP1028]], label %[[PRED_STORE_IF679:.*]], label %[[PRED_STORE_CONTINUE680:.*]]
+; CHECK:       [[PRED_STORE_IF679]]:
+; CHECK-NEXT:    [[TMP1029:%.*]] = add i64 [[INDEX]], 340
+; CHECK-NEXT:    [[TMP1030:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1029]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1030]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE680]]
+; CHECK:       [[PRED_STORE_CONTINUE680]]:
+; CHECK-NEXT:    [[TMP1031:%.*]] = extractelement <64 x i1> [[TMP5]], i32 21
+; CHECK-NEXT:    br i1 [[TMP1031]], label %[[PRED_STORE_IF681:.*]], label %[[PRED_STORE_CONTINUE682:.*]]
+; CHECK:       [[PRED_STORE_IF681]]:
+; CHECK-NEXT:    [[TMP1032:%.*]] = add i64 [[INDEX]], 341
+; CHECK-NEXT:    [[TMP1033:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1032]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1033]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE682]]
+; CHECK:       [[PRED_STORE_CONTINUE682]]:
+; CHECK-NEXT:    [[TMP1034:%.*]] = extractelement <64 x i1> [[TMP5]], i32 22
+; CHECK-NEXT:    br i1 [[TMP1034]], label %[[PRED_STORE_IF683:.*]], label %[[PRED_STORE_CONTINUE684:.*]]
+; CHECK:       [[PRED_STORE_IF683]]:
+; CHECK-NEXT:    [[TMP1035:%.*]] = add i64 [[INDEX]], 342
+; CHECK-NEXT:    [[TMP1036:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1035]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1036]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE684]]
+; CHECK:       [[PRED_STORE_CONTINUE684]]:
+; CHECK-NEXT:    [[TMP1037:%.*]] = extractelement <64 x i1> [[TMP5]], i32 23
+; CHECK-NEXT:    br i1 [[TMP1037]], label %[[PRED_STORE_IF685:.*]], label %[[PRED_STORE_CONTINUE686:.*]]
+; CHECK:       [[PRED_STORE_IF685]]:
+; CHECK-NEXT:    [[TMP1038:%.*]] = add i64 [[INDEX]], 343
+; CHECK-NEXT:    [[TMP1039:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1038]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1039]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE686]]
+; CHECK:       [[PRED_STORE_CONTINUE686]]:
+; CHECK-NEXT:    [[TMP1040:%.*]] = extractelement <64 x i1> [[TMP5]], i32 24
+; CHECK-NEXT:    br i1 [[TMP1040]], label %[[PRED_STORE_IF687:.*]], label %[[PRED_STORE_CONTINUE688:.*]]
+; CHECK:       [[PRED_STORE_IF687]]:
+; CHECK-NEXT:    [[TMP1041:%.*]] = add i64 [[INDEX]], 344
+; CHECK-NEXT:    [[TMP1042:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1041]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1042]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE688]]
+; CHECK:       [[PRED_STORE_CONTINUE688]]:
+; CHECK-NEXT:    [[TMP1043:%.*]] = extractelement <64 x i1> [[TMP5]], i32 25
+; CHECK-NEXT:    br i1 [[TMP1043]], label %[[PRED_STORE_IF689:.*]], label %[[PRED_STORE_CONTINUE690:.*]]
+; CHECK:       [[PRED_STORE_IF689]]:
+; CHECK-NEXT:    [[TMP1044:%.*]] = add i64 [[INDEX]], 345
+; CHECK-NEXT:    [[TMP1045:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1044]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1045]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE690]]
+; CHECK:       [[PRED_STORE_CONTINUE690]]:
+; CHECK-NEXT:    [[TMP1046:%.*]] = extractelement <64 x i1> [[TMP5]], i32 26
+; CHECK-NEXT:    br i1 [[TMP1046]], label %[[PRED_STORE_IF691:.*]], label %[[PRED_STORE_CONTINUE692:.*]]
+; CHECK:       [[PRED_STORE_IF691]]:
+; CHECK-NEXT:    [[TMP1047:%.*]] = add i64 [[INDEX]], 346
+; CHECK-NEXT:    [[TMP1048:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1047]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1048]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE692]]
+; CHECK:       [[PRED_STORE_CONTINUE692]]:
+; CHECK-NEXT:    [[TMP1049:%.*]] = extractelement <64 x i1> [[TMP5]], i32 27
+; CHECK-NEXT:    br i1 [[TMP1049]], label %[[PRED_STORE_IF693:.*]], label %[[PRED_STORE_CONTINUE694:.*]]
+; CHECK:       [[PRED_STORE_IF693]]:
+; CHECK-NEXT:    [[TMP1050:%.*]] = add i64 [[INDEX]], 347
+; CHECK-NEXT:    [[TMP1051:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1050]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1051]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE694]]
+; CHECK:       [[PRED_STORE_CONTINUE694]]:
+; CHECK-NEXT:    [[TMP1052:%.*]] = extractelement <64 x i1> [[TMP5]], i32 28
+; CHECK-NEXT:    br i1 [[TMP1052]], label %[[PRED_STORE_IF695:.*]], label %[[PRED_STORE_CONTINUE696:.*]]
+; CHECK:       [[PRED_STORE_IF695]]:
+; CHECK-NEXT:    [[TMP1053:%.*]] = add i64 [[INDEX]], 348
+; CHECK-NEXT:    [[TMP1054:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1053]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1054]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE696]]
+; CHECK:       [[PRED_STORE_CONTINUE696]]:
+; CHECK-NEXT:    [[TMP1055:%.*]] = extractelement <64 x i1> [[TMP5]], i32 29
+; CHECK-NEXT:    br i1 [[TMP1055]], label %[[PRED_STORE_IF697:.*]], label %[[PRED_STORE_CONTINUE698:.*]]
+; CHECK:       [[PRED_STORE_IF697]]:
+; CHECK-NEXT:    [[TMP1056:%.*]] = add i64 [[INDEX]], 349
+; CHECK-NEXT:    [[TMP1057:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1056]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1057]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE698]]
+; CHECK:       [[PRED_STORE_CONTINUE698]]:
+; CHECK-NEXT:    [[TMP1058:%.*]] = extractelement <64 x i1> [[TMP5]], i32 30
+; CHECK-NEXT:    br i1 [[TMP1058]], label %[[PRED_STORE_IF699:.*]], label %[[PRED_STORE_CONTINUE700:.*]]
+; CHECK:       [[PRED_STORE_IF699]]:
+; CHECK-NEXT:    [[TMP1059:%.*]] = add i64 [[INDEX]], 350
+; CHECK-NEXT:    [[TMP1060:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1059]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1060]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE700]]
+; CHECK:       [[PRED_STORE_CONTINUE700]]:
+; CHECK-NEXT:    [[TMP1061:%.*]] = extractelement <64 x i1> [[TMP5]], i32 31
+; CHECK-NEXT:    br i1 [[TMP1061]], label %[[PRED_STORE_IF701:.*]], label %[[PRED_STORE_CONTINUE702:.*]]
+; CHECK:       [[PRED_STORE_IF701]]:
+; CHECK-NEXT:    [[TMP1062:%.*]] = add i64 [[INDEX]], 351
+; CHECK-NEXT:    [[TMP1063:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1062]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1063]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE702]]
+; CHECK:       [[PRED_STORE_CONTINUE702]]:
+; CHECK-NEXT:    [[TMP1064:%.*]] = extractelement <64 x i1> [[TMP5]], i32 32
+; CHECK-NEXT:    br i1 [[TMP1064]], label %[[PRED_STORE_IF703:.*]], label %[[PRED_STORE_CONTINUE704:.*]]
+; CHECK:       [[PRED_STORE_IF703]]:
+; CHECK-NEXT:    [[TMP1065:%.*]] = add i64 [[INDEX]], 352
+; CHECK-NEXT:    [[TMP1066:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1065]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1066]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE704]]
+; CHECK:       [[PRED_STORE_CONTINUE704]]:
+; CHECK-NEXT:    [[TMP1067:%.*]] = extractelement <64 x i1> [[TMP5]], i32 33
+; CHECK-NEXT:    br i1 [[TMP1067]], label %[[PRED_STORE_IF705:.*]], label %[[PRED_STORE_CONTINUE706:.*]]
+; CHECK:       [[PRED_STORE_IF705]]:
+; CHECK-NEXT:    [[TMP1068:%.*]] = add i64 [[INDEX]], 353
+; CHECK-NEXT:    [[TMP1069:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1068]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1069]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE706]]
+; CHECK:       [[PRED_STORE_CONTINUE706]]:
+; CHECK-NEXT:    [[TMP1070:%.*]] = extractelement <64 x i1> [[TMP5]], i32 34
+; CHECK-NEXT:    br i1 [[TMP1070]], label %[[PRED_STORE_IF707:.*]], label %[[PRED_STORE_CONTINUE708:.*]]
+; CHECK:       [[PRED_STORE_IF707]]:
+; CHECK-NEXT:    [[TMP1071:%.*]] = add i64 [[INDEX]], 354
+; CHECK-NEXT:    [[TMP1072:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1071]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1072]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE708]]
+; CHECK:       [[PRED_STORE_CONTINUE708]]:
+; CHECK-NEXT:    [[TMP1073:%.*]] = extractelement <64 x i1> [[TMP5]], i32 35
+; CHECK-NEXT:    br i1 [[TMP1073]], label %[[PRED_STORE_IF709:.*]], label %[[PRED_STORE_CONTINUE710:.*]]
+; CHECK:       [[PRED_STORE_IF709]]:
+; CHECK-NEXT:    [[TMP1074:%.*]] = add i64 [[INDEX]], 355
+; CHECK-NEXT:    [[TMP1075:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1074]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1075]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE710]]
+; CHECK:       [[PRED_STORE_CONTINUE710]]:
+; CHECK-NEXT:    [[TMP1076:%.*]] = extractelement <64 x i1> [[TMP5]], i32 36
+; CHECK-NEXT:    br i1 [[TMP1076]], label %[[PRED_STORE_IF711:.*]], label %[[PRED_STORE_CONTINUE712:.*]]
+; CHECK:       [[PRED_STORE_IF711]]:
+; CHECK-NEXT:    [[TMP1077:%.*]] = add i64 [[INDEX]], 356
+; CHECK-NEXT:    [[TMP1078:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1077]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1078]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE712]]
+; CHECK:       [[PRED_STORE_CONTINUE712]]:
+; CHECK-NEXT:    [[TMP1079:%.*]] = extractelement <64 x i1> [[TMP5]], i32 37
+; CHECK-NEXT:    br i1 [[TMP1079]], label %[[PRED_STORE_IF713:.*]], label %[[PRED_STORE_CONTINUE714:.*]]
+; CHECK:       [[PRED_STORE_IF713]]:
+; CHECK-NEXT:    [[TMP1080:%.*]] = add i64 [[INDEX]], 357
+; CHECK-NEXT:    [[TMP1081:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1080]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1081]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE714]]
+; CHECK:       [[PRED_STORE_CONTINUE714]]:
+; CHECK-NEXT:    [[TMP1082:%.*]] = extractelement <64 x i1> [[TMP5]], i32 38
+; CHECK-NEXT:    br i1 [[TMP1082]], label %[[PRED_STORE_IF715:.*]], label %[[PRED_STORE_CONTINUE716:.*]]
+; CHECK:       [[PRED_STORE_IF715]]:
+; CHECK-NEXT:    [[TMP1083:%.*]] = add i64 [[INDEX]], 358
+; CHECK-NEXT:    [[TMP1084:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1083]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1084]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE716]]
+; CHECK:       [[PRED_STORE_CONTINUE716]]:
+; CHECK-NEXT:    [[TMP1085:%.*]] = extractelement <64 x i1> [[TMP5]], i32 39
+; CHECK-NEXT:    br i1 [[TMP1085]], label %[[PRED_STORE_IF717:.*]], label %[[PRED_STORE_CONTINUE718:.*]]
+; CHECK:       [[PRED_STORE_IF717]]:
+; CHECK-NEXT:    [[TMP1086:%.*]] = add i64 [[INDEX]], 359
+; CHECK-NEXT:    [[TMP1087:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1086]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1087]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE718]]
+; CHECK:       [[PRED_STORE_CONTINUE718]]:
+; CHECK-NEXT:    [[TMP1088:%.*]] = extractelement <64 x i1> [[TMP5]], i32 40
+; CHECK-NEXT:    br i1 [[TMP1088]], label %[[PRED_STORE_IF719:.*]], label %[[PRED_STORE_CONTINUE720:.*]]
+; CHECK:       [[PRED_STORE_IF719]]:
+; CHECK-NEXT:    [[TMP1089:%.*]] = add i64 [[INDEX]], 360
+; CHECK-NEXT:    [[TMP1090:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1089]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1090]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE720]]
+; CHECK:       [[PRED_STORE_CONTINUE720]]:
+; CHECK-NEXT:    [[TMP1091:%.*]] = extractelement <64 x i1> [[TMP5]], i32 41
+; CHECK-NEXT:    br i1 [[TMP1091]], label %[[PRED_STORE_IF721:.*]], label %[[PRED_STORE_CONTINUE722:.*]]
+; CHECK:       [[PRED_STORE_IF721]]:
+; CHECK-NEXT:    [[TMP1092:%.*]] = add i64 [[INDEX]], 361
+; CHECK-NEXT:    [[TMP1093:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1092]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1093]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE722]]
+; CHECK:       [[PRED_STORE_CONTINUE722]]:
+; CHECK-NEXT:    [[TMP1094:%.*]] = extractelement <64 x i1> [[TMP5]], i32 42
+; CHECK-NEXT:    br i1 [[TMP1094]], label %[[PRED_STORE_IF723:.*]], label %[[PRED_STORE_CONTINUE724:.*]]
+; CHECK:       [[PRED_STORE_IF723]]:
+; CHECK-NEXT:    [[TMP1095:%.*]] = add i64 [[INDEX]], 362
+; CHECK-NEXT:    [[TMP1096:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1095]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1096]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE724]]
+; CHECK:       [[PRED_STORE_CONTINUE724]]:
+; CHECK-NEXT:    [[TMP1097:%.*]] = extractelement <64 x i1> [[TMP5]], i32 43
+; CHECK-NEXT:    br i1 [[TMP1097]], label %[[PRED_STORE_IF725:.*]], label %[[PRED_STORE_CONTINUE726:.*]]
+; CHECK:       [[PRED_STORE_IF725]]:
+; CHECK-NEXT:    [[TMP1098:%.*]] = add i64 [[INDEX]], 363
+; CHECK-NEXT:    [[TMP1099:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1098]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1099]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE726]]
+; CHECK:       [[PRED_STORE_CONTINUE726]]:
+; CHECK-NEXT:    [[TMP1100:%.*]] = extractelement <64 x i1> [[TMP5]], i32 44
+; CHECK-NEXT:    br i1 [[TMP1100]], label %[[PRED_STORE_IF727:.*]], label %[[PRED_STORE_CONTINUE728:.*]]
+; CHECK:       [[PRED_STORE_IF727]]:
+; CHECK-NEXT:    [[TMP1101:%.*]] = add i64 [[INDEX]], 364
+; CHECK-NEXT:    [[TMP1102:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1101]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1102]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE728]]
+; CHECK:       [[PRED_STORE_CONTINUE728]]:
+; CHECK-NEXT:    [[TMP1103:%.*]] = extractelement <64 x i1> [[TMP5]], i32 45
+; CHECK-NEXT:    br i1 [[TMP1103]], label %[[PRED_STORE_IF729:.*]], label %[[PRED_STORE_CONTINUE730:.*]]
+; CHECK:       [[PRED_STORE_IF729]]:
+; CHECK-NEXT:    [[TMP1104:%.*]] = add i64 [[INDEX]], 365
+; CHECK-NEXT:    [[TMP1105:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1104]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1105]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE730]]
+; CHECK:       [[PRED_STORE_CONTINUE730]]:
+; CHECK-NEXT:    [[TMP1106:%.*]] = extractelement <64 x i1> [[TMP5]], i32 46
+; CHECK-NEXT:    br i1 [[TMP1106]], label %[[PRED_STORE_IF731:.*]], label %[[PRED_STORE_CONTINUE732:.*]]
+; CHECK:       [[PRED_STORE_IF731]]:
+; CHECK-NEXT:    [[TMP1107:%.*]] = add i64 [[INDEX]], 366
+; CHECK-NEXT:    [[TMP1108:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1107]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1108]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE732]]
+; CHECK:       [[PRED_STORE_CONTINUE732]]:
+; CHECK-NEXT:    [[TMP1109:%.*]] = extractelement <64 x i1> [[TMP5]], i32 47
+; CHECK-NEXT:    br i1 [[TMP1109]], label %[[PRED_STORE_IF733:.*]], label %[[PRED_STORE_CONTINUE734:.*]]
+; CHECK:       [[PRED_STORE_IF733]]:
+; CHECK-NEXT:    [[TMP1110:%.*]] = add i64 [[INDEX]], 367
+; CHECK-NEXT:    [[TMP1111:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1110]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1111]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE734]]
+; CHECK:       [[PRED_STORE_CONTINUE734]]:
+; CHECK-NEXT:    [[TMP1112:%.*]] = extractelement <64 x i1> [[TMP5]], i32 48
+; CHECK-NEXT:    br i1 [[TMP1112]], label %[[PRED_STORE_IF735:.*]], label %[[PRED_STORE_CONTINUE736:.*]]
+; CHECK:       [[PRED_STORE_IF735]]:
+; CHECK-NEXT:    [[TMP1113:%.*]] = add i64 [[INDEX]], 368
+; CHECK-NEXT:    [[TMP1114:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1113]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1114]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE736]]
+; CHECK:       [[PRED_STORE_CONTINUE736]]:
+; CHECK-NEXT:    [[TMP1115:%.*]] = extractelement <64 x i1> [[TMP5]], i32 49
+; CHECK-NEXT:    br i1 [[TMP1115]], label %[[PRED_STORE_IF737:.*]], label %[[PRED_STORE_CONTINUE738:.*]]
+; CHECK:       [[PRED_STORE_IF737]]:
+; CHECK-NEXT:    [[TMP1116:%.*]] = add i64 [[INDEX]], 369
+; CHECK-NEXT:    [[TMP1117:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1116]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1117]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE738]]
+; CHECK:       [[PRED_STORE_CONTINUE738]]:
+; CHECK-NEXT:    [[TMP1118:%.*]] = extractelement <64 x i1> [[TMP5]], i32 50
+; CHECK-NEXT:    br i1 [[TMP1118]], label %[[PRED_STORE_IF739:.*]], label %[[PRED_STORE_CONTINUE740:.*]]
+; CHECK:       [[PRED_STORE_IF739]]:
+; CHECK-NEXT:    [[TMP1119:%.*]] = add i64 [[INDEX]], 370
+; CHECK-NEXT:    [[TMP1120:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1119]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1120]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE740]]
+; CHECK:       [[PRED_STORE_CONTINUE740]]:
+; CHECK-NEXT:    [[TMP1121:%.*]] = extractelement <64 x i1> [[TMP5]], i32 51
+; CHECK-NEXT:    br i1 [[TMP1121]], label %[[PRED_STORE_IF741:.*]], label %[[PRED_STORE_CONTINUE742:.*]]
+; CHECK:       [[PRED_STORE_IF741]]:
+; CHECK-NEXT:    [[TMP1122:%.*]] = add i64 [[INDEX]], 371
+; CHECK-NEXT:    [[TMP1123:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1122]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1123]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE742]]
+; CHECK:       [[PRED_STORE_CONTINUE742]]:
+; CHECK-NEXT:    [[TMP1124:%.*]] = extractelement <64 x i1> [[TMP5]], i32 52
+; CHECK-NEXT:    br i1 [[TMP1124]], label %[[PRED_STORE_IF743:.*]], label %[[PRED_STORE_CONTINUE744:.*]]
+; CHECK:       [[PRED_STORE_IF743]]:
+; CHECK-NEXT:    [[TMP1125:%.*]] = add i64 [[INDEX]], 372
+; CHECK-NEXT:    [[TMP1126:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1125]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1126]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE744]]
+; CHECK:       [[PRED_STORE_CONTINUE744]]:
+; CHECK-NEXT:    [[TMP1127:%.*]] = extractelement <64 x i1> [[TMP5]], i32 53
+; CHECK-NEXT:    br i1 [[TMP1127]], label %[[PRED_STORE_IF745:.*]], label %[[PRED_STORE_CONTINUE746:.*]]
+; CHECK:       [[PRED_STORE_IF745]]:
+; CHECK-NEXT:    [[TMP1128:%.*]] = add i64 [[INDEX]], 373
+; CHECK-NEXT:    [[TMP1129:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1128]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1129]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE746]]
+; CHECK:       [[PRED_STORE_CONTINUE746]]:
+; CHECK-NEXT:    [[TMP1130:%.*]] = extractelement <64 x i1> [[TMP5]], i32 54
+; CHECK-NEXT:    br i1 [[TMP1130]], label %[[PRED_STORE_IF747:.*]], label %[[PRED_STORE_CONTINUE748:.*]]
+; CHECK:       [[PRED_STORE_IF747]]:
+; CHECK-NEXT:    [[TMP1131:%.*]] = add i64 [[INDEX]], 374
+; CHECK-NEXT:    [[TMP1132:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1131]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1132]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE748]]
+; CHECK:       [[PRED_STORE_CONTINUE748]]:
+; CHECK-NEXT:    [[TMP1133:%.*]] = extractelement <64 x i1> [[TMP5]], i32 55
+; CHECK-NEXT:    br i1 [[TMP1133]], label %[[PRED_STORE_IF749:.*]], label %[[PRED_STORE_CONTINUE750:.*]]
+; CHECK:       [[PRED_STORE_IF749]]:
+; CHECK-NEXT:    [[TMP1134:%.*]] = add i64 [[INDEX]], 375
+; CHECK-NEXT:    [[TMP1135:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1134]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1135]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE750]]
+; CHECK:       [[PRED_STORE_CONTINUE750]]:
+; CHECK-NEXT:    [[TMP1136:%.*]] = extractelement <64 x i1> [[TMP5]], i32 56
+; CHECK-NEXT:    br i1 [[TMP1136]], label %[[PRED_STORE_IF751:.*]], label %[[PRED_STORE_CONTINUE752:.*]]
+; CHECK:       [[PRED_STORE_IF751]]:
+; CHECK-NEXT:    [[TMP1137:%.*]] = add i64 [[INDEX]], 376
+; CHECK-NEXT:    [[TMP1138:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1137]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1138]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE752]]
+; CHECK:       [[PRED_STORE_CONTINUE752]]:
+; CHECK-NEXT:    [[TMP1139:%.*]] = extractelement <64 x i1> [[TMP5]], i32 57
+; CHECK-NEXT:    br i1 [[TMP1139]], label %[[PRED_STORE_IF753:.*]], label %[[PRED_STORE_CONTINUE754:.*]]
+; CHECK:       [[PRED_STORE_IF753]]:
+; CHECK-NEXT:    [[TMP1140:%.*]] = add i64 [[INDEX]], 377
+; CHECK-NEXT:    [[TMP1141:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1140]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1141]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE754]]
+; CHECK:       [[PRED_STORE_CONTINUE754]]:
+; CHECK-NEXT:    [[TMP1142:%.*]] = extractelement <64 x i1> [[TMP5]], i32 58
+; CHECK-NEXT:    br i1 [[TMP1142]], label %[[PRED_STORE_IF755:.*]], label %[[PRED_STORE_CONTINUE756:.*]]
+; CHECK:       [[PRED_STORE_IF755]]:
+; CHECK-NEXT:    [[TMP1143:%.*]] = add i64 [[INDEX]], 378
+; CHECK-NEXT:    [[TMP1144:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1143]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1144]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE756]]
+; CHECK:       [[PRED_STORE_CONTINUE756]]:
+; CHECK-NEXT:    [[TMP1145:%.*]] = extractelement <64 x i1> [[TMP5]], i32 59
+; CHECK-NEXT:    br i1 [[TMP1145]], label %[[PRED_STORE_IF757:.*]], label %[[PRED_STORE_CONTINUE758:.*]]
+; CHECK:       [[PRED_STORE_IF757]]:
+; CHECK-NEXT:    [[TMP1146:%.*]] = add i64 [[INDEX]], 379
+; CHECK-NEXT:    [[TMP1147:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1146]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1147]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE758]]
+; CHECK:       [[PRED_STORE_CONTINUE758]]:
+; CHECK-NEXT:    [[TMP1148:%.*]] = extractelement <64 x i1> [[TMP5]], i32 60
+; CHECK-NEXT:    br i1 [[TMP1148]], label %[[PRED_STORE_IF759:.*]], label %[[PRED_STORE_CONTINUE760:.*]]
+; CHECK:       [[PRED_STORE_IF759]]:
+; CHECK-NEXT:    [[TMP1149:%.*]] = add i64 [[INDEX]], 380
+; CHECK-NEXT:    [[TMP1150:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1149]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1150]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE760]]
+; CHECK:       [[PRED_STORE_CONTINUE760]]:
+; CHECK-NEXT:    [[TMP1151:%.*]] = extractelement <64 x i1> [[TMP5]], i32 61
+; CHECK-NEXT:    br i1 [[TMP1151]], label %[[PRED_STORE_IF761:.*]], label %[[PRED_STORE_CONTINUE762:.*]]
+; CHECK:       [[PRED_STORE_IF761]]:
+; CHECK-NEXT:    [[TMP1152:%.*]] = add i64 [[INDEX]], 381
+; CHECK-NEXT:    [[TMP1153:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1152]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1153]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE762]]
+; CHECK:       [[PRED_STORE_CONTINUE762]]:
+; CHECK-NEXT:    [[TMP1154:%.*]] = extractelement <64 x i1> [[TMP5]], i32 62
+; CHECK-NEXT:    br i1 [[TMP1154]], label %[[PRED_STORE_IF763:.*]], label %[[PRED_STORE_CONTINUE764:.*]]
+; CHECK:       [[PRED_STORE_IF763]]:
+; CHECK-NEXT:    [[TMP1155:%.*]] = add i64 [[INDEX]], 382
+; CHECK-NEXT:    [[TMP1156:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1155]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1156]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE764]]
+; CHECK:       [[PRED_STORE_CONTINUE764]]:
+; CHECK-NEXT:    [[TMP1157:%.*]] = extractelement <64 x i1> [[TMP5]], i32 63
+; CHECK-NEXT:    br i1 [[TMP1157]], label %[[PRED_STORE_IF765:.*]], label %[[PRED_STORE_CONTINUE766:.*]]
+; CHECK:       [[PRED_STORE_IF765]]:
+; CHECK-NEXT:    [[TMP1158:%.*]] = add i64 [[INDEX]], 383
+; CHECK-NEXT:    [[TMP1159:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1158]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1159]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE766]]
+; CHECK:       [[PRED_STORE_CONTINUE766]]:
+; CHECK-NEXT:    [[TMP1160:%.*]] = extractelement <64 x i1> [[TMP6]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1160]], label %[[PRED_STORE_IF767:.*]], label %[[PRED_STORE_CONTINUE768:.*]]
+; CHECK:       [[PRED_STORE_IF767]]:
+; CHECK-NEXT:    [[TMP1161:%.*]] = add i64 [[INDEX]], 384
+; CHECK-NEXT:    [[TMP1162:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1161]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1162]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE768]]
+; CHECK:       [[PRED_STORE_CONTINUE768]]:
+; CHECK-NEXT:    [[TMP1163:%.*]] = extractelement <64 x i1> [[TMP6]], i32 1
+; CHECK-NEXT:    br i1 [[TMP1163]], label %[[PRED_STORE_IF769:.*]], label %[[PRED_STORE_CONTINUE770:.*]]
+; CHECK:       [[PRED_STORE_IF769]]:
+; CHECK-NEXT:    [[TMP1164:%.*]] = add i64 [[INDEX]], 385
+; CHECK-NEXT:    [[TMP1165:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1164]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1165]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE770]]
+; CHECK:       [[PRED_STORE_CONTINUE770]]:
+; CHECK-NEXT:    [[TMP1166:%.*]] = extractelement <64 x i1> [[TMP6]], i32 2
+; CHECK-NEXT:    br i1 [[TMP1166]], label %[[PRED_STORE_IF771:.*]], label %[[PRED_STORE_CONTINUE772:.*]]
+; CHECK:       [[PRED_STORE_IF771]]:
+; CHECK-NEXT:    [[TMP1167:%.*]] = add i64 [[INDEX]], 386
+; CHECK-NEXT:    [[TMP1168:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1167]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1168]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE772]]
+; CHECK:       [[PRED_STORE_CONTINUE772]]:
+; CHECK-NEXT:    [[TMP1169:%.*]] = extractelement <64 x i1> [[TMP6]], i32 3
+; CHECK-NEXT:    br i1 [[TMP1169]], label %[[PRED_STORE_IF773:.*]], label %[[PRED_STORE_CONTINUE774:.*]]
+; CHECK:       [[PRED_STORE_IF773]]:
+; CHECK-NEXT:    [[TMP1170:%.*]] = add i64 [[INDEX]], 387
+; CHECK-NEXT:    [[TMP1171:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1170]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1171]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE774]]
+; CHECK:       [[PRED_STORE_CONTINUE774]]:
+; CHECK-NEXT:    [[TMP1172:%.*]] = extractelement <64 x i1> [[TMP6]], i32 4
+; CHECK-NEXT:    br i1 [[TMP1172]], label %[[PRED_STORE_IF775:.*]], label %[[PRED_STORE_CONTINUE776:.*]]
+; CHECK:       [[PRED_STORE_IF775]]:
+; CHECK-NEXT:    [[TMP1173:%.*]] = add i64 [[INDEX]], 388
+; CHECK-NEXT:    [[TMP1174:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1173]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1174]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE776]]
+; CHECK:       [[PRED_STORE_CONTINUE776]]:
+; CHECK-NEXT:    [[TMP1175:%.*]] = extractelement <64 x i1> [[TMP6]], i32 5
+; CHECK-NEXT:    br i1 [[TMP1175]], label %[[PRED_STORE_IF777:.*]], label %[[PRED_STORE_CONTINUE778:.*]]
+; CHECK:       [[PRED_STORE_IF777]]:
+; CHECK-NEXT:    [[TMP1176:%.*]] = add i64 [[INDEX]], 389
+; CHECK-NEXT:    [[TMP1177:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1176]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1177]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE778]]
+; CHECK:       [[PRED_STORE_CONTINUE778]]:
+; CHECK-NEXT:    [[TMP1178:%.*]] = extractelement <64 x i1> [[TMP6]], i32 6
+; CHECK-NEXT:    br i1 [[TMP1178]], label %[[PRED_STORE_IF779:.*]], label %[[PRED_STORE_CONTINUE780:.*]]
+; CHECK:       [[PRED_STORE_IF779]]:
+; CHECK-NEXT:    [[TMP1179:%.*]] = add i64 [[INDEX]], 390
+; CHECK-NEXT:    [[TMP1180:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1179]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1180]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE780]]
+; CHECK:       [[PRED_STORE_CONTINUE780]]:
+; CHECK-NEXT:    [[TMP1181:%.*]] = extractelement <64 x i1> [[TMP6]], i32 7
+; CHECK-NEXT:    br i1 [[TMP1181]], label %[[PRED_STORE_IF781:.*]], label %[[PRED_STORE_CONTINUE782:.*]]
+; CHECK:       [[PRED_STORE_IF781]]:
+; CHECK-NEXT:    [[TMP1182:%.*]] = add i64 [[INDEX]], 391
+; CHECK-NEXT:    [[TMP1183:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1182]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1183]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE782]]
+; CHECK:       [[PRED_STORE_CONTINUE782]]:
+; CHECK-NEXT:    [[TMP1184:%.*]] = extractelement <64 x i1> [[TMP6]], i32 8
+; CHECK-NEXT:    br i1 [[TMP1184]], label %[[PRED_STORE_IF783:.*]], label %[[PRED_STORE_CONTINUE784:.*]]
+; CHECK:       [[PRED_STORE_IF783]]:
+; CHECK-NEXT:    [[TMP1185:%.*]] = add i64 [[INDEX]], 392
+; CHECK-NEXT:    [[TMP1186:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1185]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1186]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE784]]
+; CHECK:       [[PRED_STORE_CONTINUE784]]:
+; CHECK-NEXT:    [[TMP1187:%.*]] = extractelement <64 x i1> [[TMP6]], i32 9
+; CHECK-NEXT:    br i1 [[TMP1187]], label %[[PRED_STORE_IF785:.*]], label %[[PRED_STORE_CONTINUE786:.*]]
+; CHECK:       [[PRED_STORE_IF785]]:
+; CHECK-NEXT:    [[TMP1188:%.*]] = add i64 [[INDEX]], 393
+; CHECK-NEXT:    [[TMP1189:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1188]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1189]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE786]]
+; CHECK:       [[PRED_STORE_CONTINUE786]]:
+; CHECK-NEXT:    [[TMP1190:%.*]] = extractelement <64 x i1> [[TMP6]], i32 10
+; CHECK-NEXT:    br i1 [[TMP1190]], label %[[PRED_STORE_IF787:.*]], label %[[PRED_STORE_CONTINUE788:.*]]
+; CHECK:       [[PRED_STORE_IF787]]:
+; CHECK-NEXT:    [[TMP1191:%.*]] = add i64 [[INDEX]], 394
+; CHECK-NEXT:    [[TMP1192:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1191]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1192]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE788]]
+; CHECK:       [[PRED_STORE_CONTINUE788]]:
+; CHECK-NEXT:    [[TMP1193:%.*]] = extractelement <64 x i1> [[TMP6]], i32 11
+; CHECK-NEXT:    br i1 [[TMP1193]], label %[[PRED_STORE_IF789:.*]], label %[[PRED_STORE_CONTINUE790:.*]]
+; CHECK:       [[PRED_STORE_IF789]]:
+; CHECK-NEXT:    [[TMP1194:%.*]] = add i64 [[INDEX]], 395
+; CHECK-NEXT:    [[TMP1195:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1194]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1195]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE790]]
+; CHECK:       [[PRED_STORE_CONTINUE790]]:
+; CHECK-NEXT:    [[TMP1196:%.*]] = extractelement <64 x i1> [[TMP6]], i32 12
+; CHECK-NEXT:    br i1 [[TMP1196]], label %[[PRED_STORE_IF791:.*]], label %[[PRED_STORE_CONTINUE792:.*]]
+; CHECK:       [[PRED_STORE_IF791]]:
+; CHECK-NEXT:    [[TMP1197:%.*]] = add i64 [[INDEX]], 396
+; CHECK-NEXT:    [[TMP1198:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1197]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1198]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE792]]
+; CHECK:       [[PRED_STORE_CONTINUE792]]:
+; CHECK-NEXT:    [[TMP1199:%.*]] = extractelement <64 x i1> [[TMP6]], i32 13
+; CHECK-NEXT:    br i1 [[TMP1199]], label %[[PRED_STORE_IF793:.*]], label %[[PRED_STORE_CONTINUE794:.*]]
+; CHECK:       [[PRED_STORE_IF793]]:
+; CHECK-NEXT:    [[TMP1200:%.*]] = add i64 [[INDEX]], 397
+; CHECK-NEXT:    [[TMP1201:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1200]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1201]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE794]]
+; CHECK:       [[PRED_STORE_CONTINUE794]]:
+; CHECK-NEXT:    [[TMP1202:%.*]] = extractelement <64 x i1> [[TMP6]], i32 14
+; CHECK-NEXT:    br i1 [[TMP1202]], label %[[PRED_STORE_IF795:.*]], label %[[PRED_STORE_CONTINUE796:.*]]
+; CHECK:       [[PRED_STORE_IF795]]:
+; CHECK-NEXT:    [[TMP1203:%.*]] = add i64 [[INDEX]], 398
+; CHECK-NEXT:    [[TMP1204:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1203]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1204]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE796]]
+; CHECK:       [[PRED_STORE_CONTINUE796]]:
+; CHECK-NEXT:    [[TMP1205:%.*]] = extractelement <64 x i1> [[TMP6]], i32 15
+; CHECK-NEXT:    br i1 [[TMP1205]], label %[[PRED_STORE_IF797:.*]], label %[[PRED_STORE_CONTINUE798:.*]]
+; CHECK:       [[PRED_STORE_IF797]]:
+; CHECK-NEXT:    [[TMP1206:%.*]] = add i64 [[INDEX]], 399
+; CHECK-NEXT:    [[TMP1207:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1206]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1207]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE798]]
+; CHECK:       [[PRED_STORE_CONTINUE798]]:
+; CHECK-NEXT:    [[TMP1208:%.*]] = extractelement <64 x i1> [[TMP6]], i32 16
+; CHECK-NEXT:    br i1 [[TMP1208]], label %[[PRED_STORE_IF799:.*]], label %[[PRED_STORE_CONTINUE800:.*]]
+; CHECK:       [[PRED_STORE_IF799]]:
+; CHECK-NEXT:    [[TMP1209:%.*]] = add i64 [[INDEX]], 400
+; CHECK-NEXT:    [[TMP1210:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1209]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1210]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE800]]
+; CHECK:       [[PRED_STORE_CONTINUE800]]:
+; CHECK-NEXT:    [[TMP1211:%.*]] = extractelement <64 x i1> [[TMP6]], i32 17
+; CHECK-NEXT:    br i1 [[TMP1211]], label %[[PRED_STORE_IF801:.*]], label %[[PRED_STORE_CONTINUE802:.*]]
+; CHECK:       [[PRED_STORE_IF801]]:
+; CHECK-NEXT:    [[TMP1212:%.*]] = add i64 [[INDEX]], 401
+; CHECK-NEXT:    [[TMP1213:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1212]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1213]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE802]]
+; CHECK:       [[PRED_STORE_CONTINUE802]]:
+; CHECK-NEXT:    [[TMP1214:%.*]] = extractelement <64 x i1> [[TMP6]], i32 18
+; CHECK-NEXT:    br i1 [[TMP1214]], label %[[PRED_STORE_IF803:.*]], label %[[PRED_STORE_CONTINUE804:.*]]
+; CHECK:       [[PRED_STORE_IF803]]:
+; CHECK-NEXT:    [[TMP1215:%.*]] = add i64 [[INDEX]], 402
+; CHECK-NEXT:    [[TMP1216:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1215]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1216]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE804]]
+; CHECK:       [[PRED_STORE_CONTINUE804]]:
+; CHECK-NEXT:    [[TMP1217:%.*]] = extractelement <64 x i1> [[TMP6]], i32 19
+; CHECK-NEXT:    br i1 [[TMP1217]], label %[[PRED_STORE_IF805:.*]], label %[[PRED_STORE_CONTINUE806:.*]]
+; CHECK:       [[PRED_STORE_IF805]]:
+; CHECK-NEXT:    [[TMP1218:%.*]] = add i64 [[INDEX]], 403
+; CHECK-NEXT:    [[TMP1219:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1218]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1219]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE806]]
+; CHECK:       [[PRED_STORE_CONTINUE806]]:
+; CHECK-NEXT:    [[TMP1220:%.*]] = extractelement <64 x i1> [[TMP6]], i32 20
+; CHECK-NEXT:    br i1 [[TMP1220]], label %[[PRED_STORE_IF807:.*]], label %[[PRED_STORE_CONTINUE808:.*]]
+; CHECK:       [[PRED_STORE_IF807]]:
+; CHECK-NEXT:    [[TMP1221:%.*]] = add i64 [[INDEX]], 404
+; CHECK-NEXT:    [[TMP1222:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1221]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1222]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE808]]
+; CHECK:       [[PRED_STORE_CONTINUE808]]:
+; CHECK-NEXT:    [[TMP1223:%.*]] = extractelement <64 x i1> [[TMP6]], i32 21
+; CHECK-NEXT:    br i1 [[TMP1223]], label %[[PRED_STORE_IF809:.*]], label %[[PRED_STORE_CONTINUE810:.*]]
+; CHECK:       [[PRED_STORE_IF809]]:
+; CHECK-NEXT:    [[TMP1224:%.*]] = add i64 [[INDEX]], 405
+; CHECK-NEXT:    [[TMP1225:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1224]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1225]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE810]]
+; CHECK:       [[PRED_STORE_CONTINUE810]]:
+; CHECK-NEXT:    [[TMP1226:%.*]] = extractelement <64 x i1> [[TMP6]], i32 22
+; CHECK-NEXT:    br i1 [[TMP1226]], label %[[PRED_STORE_IF811:.*]], label %[[PRED_STORE_CONTINUE812:.*]]
+; CHECK:       [[PRED_STORE_IF811]]:
+; CHECK-NEXT:    [[TMP1227:%.*]] = add i64 [[INDEX]], 406
+; CHECK-NEXT:    [[TMP1228:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1227]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1228]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE812]]
+; CHECK:       [[PRED_STORE_CONTINUE812]]:
+; CHECK-NEXT:    [[TMP1229:%.*]] = extractelement <64 x i1> [[TMP6]], i32 23
+; CHECK-NEXT:    br i1 [[TMP1229]], label %[[PRED_STORE_IF813:.*]], label %[[PRED_STORE_CONTINUE814:.*]]
+; CHECK:       [[PRED_STORE_IF813]]:
+; CHECK-NEXT:    [[TMP1230:%.*]] = add i64 [[INDEX]], 407
+; CHECK-NEXT:    [[TMP1231:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1230]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1231]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE814]]
+; CHECK:       [[PRED_STORE_CONTINUE814]]:
+; CHECK-NEXT:    [[TMP1232:%.*]] = extractelement <64 x i1> [[TMP6]], i32 24
+; CHECK-NEXT:    br i1 [[TMP1232]], label %[[PRED_STORE_IF815:.*]], label %[[PRED_STORE_CONTINUE816:.*]]
+; CHECK:       [[PRED_STORE_IF815]]:
+; CHECK-NEXT:    [[TMP1233:%.*]] = add i64 [[INDEX]], 408
+; CHECK-NEXT:    [[TMP1234:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1233]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1234]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE816]]
+; CHECK:       [[PRED_STORE_CONTINUE816]]:
+; CHECK-NEXT:    [[TMP1235:%.*]] = extractelement <64 x i1> [[TMP6]], i32 25
+; CHECK-NEXT:    br i1 [[TMP1235]], label %[[PRED_STORE_IF817:.*]], label %[[PRED_STORE_CONTINUE818:.*]]
+; CHECK:       [[PRED_STORE_IF817]]:
+; CHECK-NEXT:    [[TMP1236:%.*]] = add i64 [[INDEX]], 409
+; CHECK-NEXT:    [[TMP1237:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1236]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1237]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE818]]
+; CHECK:       [[PRED_STORE_CONTINUE818]]:
+; CHECK-NEXT:    [[TMP1238:%.*]] = extractelement <64 x i1> [[TMP6]], i32 26
+; CHECK-NEXT:    br i1 [[TMP1238]], label %[[PRED_STORE_IF819:.*]], label %[[PRED_STORE_CONTINUE820:.*]]
+; CHECK:       [[PRED_STORE_IF819]]:
+; CHECK-NEXT:    [[TMP1239:%.*]] = add i64 [[INDEX]], 410
+; CHECK-NEXT:    [[TMP1240:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1239]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1240]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE820]]
+; CHECK:       [[PRED_STORE_CONTINUE820]]:
+; CHECK-NEXT:    [[TMP1241:%.*]] = extractelement <64 x i1> [[TMP6]], i32 27
+; CHECK-NEXT:    br i1 [[TMP1241]], label %[[PRED_STORE_IF821:.*]], label %[[PRED_STORE_CONTINUE822:.*]]
+; CHECK:       [[PRED_STORE_IF821]]:
+; CHECK-NEXT:    [[TMP1242:%.*]] = add i64 [[INDEX]], 411
+; CHECK-NEXT:    [[TMP1243:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1242]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1243]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE822]]
+; CHECK:       [[PRED_STORE_CONTINUE822]]:
+; CHECK-NEXT:    [[TMP1244:%.*]] = extractelement <64 x i1> [[TMP6]], i32 28
+; CHECK-NEXT:    br i1 [[TMP1244]], label %[[PRED_STORE_IF823:.*]], label %[[PRED_STORE_CONTINUE824:.*]]
+; CHECK:       [[PRED_STORE_IF823]]:
+; CHECK-NEXT:    [[TMP1245:%.*]] = add i64 [[INDEX]], 412
+; CHECK-NEXT:    [[TMP1246:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1245]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1246]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE824]]
+; CHECK:       [[PRED_STORE_CONTINUE824]]:
+; CHECK-NEXT:    [[TMP1247:%.*]] = extractelement <64 x i1> [[TMP6]], i32 29
+; CHECK-NEXT:    br i1 [[TMP1247]], label %[[PRED_STORE_IF825:.*]], label %[[PRED_STORE_CONTINUE826:.*]]
+; CHECK:       [[PRED_STORE_IF825]]:
+; CHECK-NEXT:    [[TMP1248:%.*]] = add i64 [[INDEX]], 413
+; CHECK-NEXT:    [[TMP1249:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1248]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1249]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE826]]
+; CHECK:       [[PRED_STORE_CONTINUE826]]:
+; CHECK-NEXT:    [[TMP1250:%.*]] = extractelement <64 x i1> [[TMP6]], i32 30
+; CHECK-NEXT:    br i1 [[TMP1250]], label %[[PRED_STORE_IF827:.*]], label %[[PRED_STORE_CONTINUE828:.*]]
+; CHECK:       [[PRED_STORE_IF827]]:
+; CHECK-NEXT:    [[TMP1251:%.*]] = add i64 [[INDEX]], 414
+; CHECK-NEXT:    [[TMP1252:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1251]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1252]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE828]]
+; CHECK:       [[PRED_STORE_CONTINUE828]]:
+; CHECK-NEXT:    [[TMP1253:%.*]] = extractelement <64 x i1> [[TMP6]], i32 31
+; CHECK-NEXT:    br i1 [[TMP1253]], label %[[PRED_STORE_IF829:.*]], label %[[PRED_STORE_CONTINUE830:.*]]
+; CHECK:       [[PRED_STORE_IF829]]:
+; CHECK-NEXT:    [[TMP1254:%.*]] = add i64 [[INDEX]], 415
+; CHECK-NEXT:    [[TMP1255:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1254]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1255]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE830]]
+; CHECK:       [[PRED_STORE_CONTINUE830]]:
+; CHECK-NEXT:    [[TMP1256:%.*]] = extractelement <64 x i1> [[TMP6]], i32 32
+; CHECK-NEXT:    br i1 [[TMP1256]], label %[[PRED_STORE_IF831:.*]], label %[[PRED_STORE_CONTINUE832:.*]]
+; CHECK:       [[PRED_STORE_IF831]]:
+; CHECK-NEXT:    [[TMP1257:%.*]] = add i64 [[INDEX]], 416
+; CHECK-NEXT:    [[TMP1258:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1257]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1258]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE832]]
+; CHECK:       [[PRED_STORE_CONTINUE832]]:
+; CHECK-NEXT:    [[TMP1259:%.*]] = extractelement <64 x i1> [[TMP6]], i32 33
+; CHECK-NEXT:    br i1 [[TMP1259]], label %[[PRED_STORE_IF833:.*]], label %[[PRED_STORE_CONTINUE834:.*]]
+; CHECK:       [[PRED_STORE_IF833]]:
+; CHECK-NEXT:    [[TMP1260:%.*]] = add i64 [[INDEX]], 417
+; CHECK-NEXT:    [[TMP1261:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1260]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1261]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE834]]
+; CHECK:       [[PRED_STORE_CONTINUE834]]:
+; CHECK-NEXT:    [[TMP1262:%.*]] = extractelement <64 x i1> [[TMP6]], i32 34
+; CHECK-NEXT:    br i1 [[TMP1262]], label %[[PRED_STORE_IF835:.*]], label %[[PRED_STORE_CONTINUE836:.*]]
+; CHECK:       [[PRED_STORE_IF835]]:
+; CHECK-NEXT:    [[TMP1263:%.*]] = add i64 [[INDEX]], 418
+; CHECK-NEXT:    [[TMP1264:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1263]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1264]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE836]]
+; CHECK:       [[PRED_STORE_CONTINUE836]]:
+; CHECK-NEXT:    [[TMP1265:%.*]] = extractelement <64 x i1> [[TMP6]], i32 35
+; CHECK-NEXT:    br i1 [[TMP1265]], label %[[PRED_STORE_IF837:.*]], label %[[PRED_STORE_CONTINUE838:.*]]
+; CHECK:       [[PRED_STORE_IF837]]:
+; CHECK-NEXT:    [[TMP1266:%.*]] = add i64 [[INDEX]], 419
+; CHECK-NEXT:    [[TMP1267:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1266]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1267]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE838]]
+; CHECK:       [[PRED_STORE_CONTINUE838]]:
+; CHECK-NEXT:    [[TMP1268:%.*]] = extractelement <64 x i1> [[TMP6]], i32 36
+; CHECK-NEXT:    br i1 [[TMP1268]], label %[[PRED_STORE_IF839:.*]], label %[[PRED_STORE_CONTINUE840:.*]]
+; CHECK:       [[PRED_STORE_IF839]]:
+; CHECK-NEXT:    [[TMP1269:%.*]] = add i64 [[INDEX]], 420
+; CHECK-NEXT:    [[TMP1270:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1269]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1270]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE840]]
+; CHECK:       [[PRED_STORE_CONTINUE840]]:
+; CHECK-NEXT:    [[TMP1271:%.*]] = extractelement <64 x i1> [[TMP6]], i32 37
+; CHECK-NEXT:    br i1 [[TMP1271]], label %[[PRED_STORE_IF841:.*]], label %[[PRED_STORE_CONTINUE842:.*]]
+; CHECK:       [[PRED_STORE_IF841]]:
+; CHECK-NEXT:    [[TMP1272:%.*]] = add i64 [[INDEX]], 421
+; CHECK-NEXT:    [[TMP1273:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1272]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1273]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE842]]
+; CHECK:       [[PRED_STORE_CONTINUE842]]:
+; CHECK-NEXT:    [[TMP1274:%.*]] = extractelement <64 x i1> [[TMP6]], i32 38
+; CHECK-NEXT:    br i1 [[TMP1274]], label %[[PRED_STORE_IF843:.*]], label %[[PRED_STORE_CONTINUE844:.*]]
+; CHECK:       [[PRED_STORE_IF843]]:
+; CHECK-NEXT:    [[TMP1275:%.*]] = add i64 [[INDEX]], 422
+; CHECK-NEXT:    [[TMP1276:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1275]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1276]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE844]]
+; CHECK:       [[PRED_STORE_CONTINUE844]]:
+; CHECK-NEXT:    [[TMP1277:%.*]] = extractelement <64 x i1> [[TMP6]], i32 39
+; CHECK-NEXT:    br i1 [[TMP1277]], label %[[PRED_STORE_IF845:.*]], label %[[PRED_STORE_CONTINUE846:.*]]
+; CHECK:       [[PRED_STORE_IF845]]:
+; CHECK-NEXT:    [[TMP1278:%.*]] = add i64 [[INDEX]], 423
+; CHECK-NEXT:    [[TMP1279:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1278]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1279]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE846]]
+; CHECK:       [[PRED_STORE_CONTINUE846]]:
+; CHECK-NEXT:    [[TMP1280:%.*]] = extractelement <64 x i1> [[TMP6]], i32 40
+; CHECK-NEXT:    br i1 [[TMP1280]], label %[[PRED_STORE_IF847:.*]], label %[[PRED_STORE_CONTINUE848:.*]]
+; CHECK:       [[PRED_STORE_IF847]]:
+; CHECK-NEXT:    [[TMP1281:%.*]] = add i64 [[INDEX]], 424
+; CHECK-NEXT:    [[TMP1282:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1281]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1282]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE848]]
+; CHECK:       [[PRED_STORE_CONTINUE848]]:
+; CHECK-NEXT:    [[TMP1283:%.*]] = extractelement <64 x i1> [[TMP6]], i32 41
+; CHECK-NEXT:    br i1 [[TMP1283]], label %[[PRED_STORE_IF849:.*]], label %[[PRED_STORE_CONTINUE850:.*]]
+; CHECK:       [[PRED_STORE_IF849]]:
+; CHECK-NEXT:    [[TMP1284:%.*]] = add i64 [[INDEX]], 425
+; CHECK-NEXT:    [[TMP1285:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1284]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1285]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE850]]
+; CHECK:       [[PRED_STORE_CONTINUE850]]:
+; CHECK-NEXT:    [[TMP1286:%.*]] = extractelement <64 x i1> [[TMP6]], i32 42
+; CHECK-NEXT:    br i1 [[TMP1286]], label %[[PRED_STORE_IF851:.*]], label %[[PRED_STORE_CONTINUE852:.*]]
+; CHECK:       [[PRED_STORE_IF851]]:
+; CHECK-NEXT:    [[TMP1287:%.*]] = add i64 [[INDEX]], 426
+; CHECK-NEXT:    [[TMP1288:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1287]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1288]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE852]]
+; CHECK:       [[PRED_STORE_CONTINUE852]]:
+; CHECK-NEXT:    [[TMP1289:%.*]] = extractelement <64 x i1> [[TMP6]], i32 43
+; CHECK-NEXT:    br i1 [[TMP1289]], label %[[PRED_STORE_IF853:.*]], label %[[PRED_STORE_CONTINUE854:.*]]
+; CHECK:       [[PRED_STORE_IF853]]:
+; CHECK-NEXT:    [[TMP1290:%.*]] = add i64 [[INDEX]], 427
+; CHECK-NEXT:    [[TMP1291:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1290]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1291]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE854]]
+; CHECK:       [[PRED_STORE_CONTINUE854]]:
+; CHECK-NEXT:    [[TMP1292:%.*]] = extractelement <64 x i1> [[TMP6]], i32 44
+; CHECK-NEXT:    br i1 [[TMP1292]], label %[[PRED_STORE_IF855:.*]], label %[[PRED_STORE_CONTINUE856:.*]]
+; CHECK:       [[PRED_STORE_IF855]]:
+; CHECK-NEXT:    [[TMP1293:%.*]] = add i64 [[INDEX]], 428
+; CHECK-NEXT:    [[TMP1294:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1293]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1294]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE856]]
+; CHECK:       [[PRED_STORE_CONTINUE856]]:
+; CHECK-NEXT:    [[TMP1295:%.*]] = extractelement <64 x i1> [[TMP6]], i32 45
+; CHECK-NEXT:    br i1 [[TMP1295]], label %[[PRED_STORE_IF857:.*]], label %[[PRED_STORE_CONTINUE858:.*]]
+; CHECK:       [[PRED_STORE_IF857]]:
+; CHECK-NEXT:    [[TMP1296:%.*]] = add i64 [[INDEX]], 429
+; CHECK-NEXT:    [[TMP1297:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1296]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1297]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE858]]
+; CHECK:       [[PRED_STORE_CONTINUE858]]:
+; CHECK-NEXT:    [[TMP1298:%.*]] = extractelement <64 x i1> [[TMP6]], i32 46
+; CHECK-NEXT:    br i1 [[TMP1298]], label %[[PRED_STORE_IF859:.*]], label %[[PRED_STORE_CONTINUE860:.*]]
+; CHECK:       [[PRED_STORE_IF859]]:
+; CHECK-NEXT:    [[TMP1299:%.*]] = add i64 [[INDEX]], 430
+; CHECK-NEXT:    [[TMP1300:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1299]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1300]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE860]]
+; CHECK:       [[PRED_STORE_CONTINUE860]]:
+; CHECK-NEXT:    [[TMP1301:%.*]] = extractelement <64 x i1> [[TMP6]], i32 47
+; CHECK-NEXT:    br i1 [[TMP1301]], label %[[PRED_STORE_IF861:.*]], label %[[PRED_STORE_CONTINUE862:.*]]
+; CHECK:       [[PRED_STORE_IF861]]:
+; CHECK-NEXT:    [[TMP1302:%.*]] = add i64 [[INDEX]], 431
+; CHECK-NEXT:    [[TMP1303:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1302]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1303]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE862]]
+; CHECK:       [[PRED_STORE_CONTINUE862]]:
+; CHECK-NEXT:    [[TMP1304:%.*]] = extractelement <64 x i1> [[TMP6]], i32 48
+; CHECK-NEXT:    br i1 [[TMP1304]], label %[[PRED_STORE_IF863:.*]], label %[[PRED_STORE_CONTINUE864:.*]]
+; CHECK:       [[PRED_STORE_IF863]]:
+; CHECK-NEXT:    [[TMP1305:%.*]] = add i64 [[INDEX]], 432
+; CHECK-NEXT:    [[TMP1306:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1305]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1306]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE864]]
+; CHECK:       [[PRED_STORE_CONTINUE864]]:
+; CHECK-NEXT:    [[TMP1307:%.*]] = extractelement <64 x i1> [[TMP6]], i32 49
+; CHECK-NEXT:    br i1 [[TMP1307]], label %[[PRED_STORE_IF865:.*]], label %[[PRED_STORE_CONTINUE866:.*]]
+; CHECK:       [[PRED_STORE_IF865]]:
+; CHECK-NEXT:    [[TMP1308:%.*]] = add i64 [[INDEX]], 433
+; CHECK-NEXT:    [[TMP1309:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1308]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1309]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE866]]
+; CHECK:       [[PRED_STORE_CONTINUE866]]:
+; CHECK-NEXT:    [[TMP1310:%.*]] = extractelement <64 x i1> [[TMP6]], i32 50
+; CHECK-NEXT:    br i1 [[TMP1310]], label %[[PRED_STORE_IF867:.*]], label %[[PRED_STORE_CONTINUE868:.*]]
+; CHECK:       [[PRED_STORE_IF867]]:
+; CHECK-NEXT:    [[TMP1311:%.*]] = add i64 [[INDEX]], 434
+; CHECK-NEXT:    [[TMP1312:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1311]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1312]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE868]]
+; CHECK:       [[PRED_STORE_CONTINUE868]]:
+; CHECK-NEXT:    [[TMP1313:%.*]] = extractelement <64 x i1> [[TMP6]], i32 51
+; CHECK-NEXT:    br i1 [[TMP1313]], label %[[PRED_STORE_IF869:.*]], label %[[PRED_STORE_CONTINUE870:.*]]
+; CHECK:       [[PRED_STORE_IF869]]:
+; CHECK-NEXT:    [[TMP1314:%.*]] = add i64 [[INDEX]], 435
+; CHECK-NEXT:    [[TMP1315:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1314]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1315]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE870]]
+; CHECK:       [[PRED_STORE_CONTINUE870]]:
+; CHECK-NEXT:    [[TMP1316:%.*]] = extractelement <64 x i1> [[TMP6]], i32 52
+; CHECK-NEXT:    br i1 [[TMP1316]], label %[[PRED_STORE_IF871:.*]], label %[[PRED_STORE_CONTINUE872:.*]]
+; CHECK:       [[PRED_STORE_IF871]]:
+; CHECK-NEXT:    [[TMP1317:%.*]] = add i64 [[INDEX]], 436
+; CHECK-NEXT:    [[TMP1318:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1317]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1318]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE872]]
+; CHECK:       [[PRED_STORE_CONTINUE872]]:
+; CHECK-NEXT:    [[TMP1319:%.*]] = extractelement <64 x i1> [[TMP6]], i32 53
+; CHECK-NEXT:    br i1 [[TMP1319]], label %[[PRED_STORE_IF873:.*]], label %[[PRED_STORE_CONTINUE874:.*]]
+; CHECK:       [[PRED_STORE_IF873]]:
+; CHECK-NEXT:    [[TMP1320:%.*]] = add i64 [[INDEX]], 437
+; CHECK-NEXT:    [[TMP1321:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1320]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1321]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE874]]
+; CHECK:       [[PRED_STORE_CONTINUE874]]:
+; CHECK-NEXT:    [[TMP1322:%.*]] = extractelement <64 x i1> [[TMP6]], i32 54
+; CHECK-NEXT:    br i1 [[TMP1322]], label %[[PRED_STORE_IF875:.*]], label %[[PRED_STORE_CONTINUE876:.*]]
+; CHECK:       [[PRED_STORE_IF875]]:
+; CHECK-NEXT:    [[TMP1323:%.*]] = add i64 [[INDEX]], 438
+; CHECK-NEXT:    [[TMP1324:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1323]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1324]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE876]]
+; CHECK:       [[PRED_STORE_CONTINUE876]]:
+; CHECK-NEXT:    [[TMP1325:%.*]] = extractelement <64 x i1> [[TMP6]], i32 55
+; CHECK-NEXT:    br i1 [[TMP1325]], label %[[PRED_STORE_IF877:.*]], label %[[PRED_STORE_CONTINUE878:.*]]
+; CHECK:       [[PRED_STORE_IF877]]:
+; CHECK-NEXT:    [[TMP1326:%.*]] = add i64 [[INDEX]], 439
+; CHECK-NEXT:    [[TMP1327:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1326]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1327]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE878]]
+; CHECK:       [[PRED_STORE_CONTINUE878]]:
+; CHECK-NEXT:    [[TMP1328:%.*]] = extractelement <64 x i1> [[TMP6]], i32 56
+; CHECK-NEXT:    br i1 [[TMP1328]], label %[[PRED_STORE_IF879:.*]], label %[[PRED_STORE_CONTINUE880:.*]]
+; CHECK:       [[PRED_STORE_IF879]]:
+; CHECK-NEXT:    [[TMP1329:%.*]] = add i64 [[INDEX]], 440
+; CHECK-NEXT:    [[TMP1330:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1329]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1330]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE880]]
+; CHECK:       [[PRED_STORE_CONTINUE880]]:
+; CHECK-NEXT:    [[TMP1331:%.*]] = extractelement <64 x i1> [[TMP6]], i32 57
+; CHECK-NEXT:    br i1 [[TMP1331]], label %[[PRED_STORE_IF881:.*]], label %[[PRED_STORE_CONTINUE882:.*]]
+; CHECK:       [[PRED_STORE_IF881]]:
+; CHECK-NEXT:    [[TMP1332:%.*]] = add i64 [[INDEX]], 441
+; CHECK-NEXT:    [[TMP1333:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1332]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1333]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE882]]
+; CHECK:       [[PRED_STORE_CONTINUE882]]:
+; CHECK-NEXT:    [[TMP1334:%.*]] = extractelement <64 x i1> [[TMP6]], i32 58
+; CHECK-NEXT:    br i1 [[TMP1334]], label %[[PRED_STORE_IF883:.*]], label %[[PRED_STORE_CONTINUE884:.*]]
+; CHECK:       [[PRED_STORE_IF883]]:
+; CHECK-NEXT:    [[TMP1335:%.*]] = add i64 [[INDEX]], 442
+; CHECK-NEXT:    [[TMP1336:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1335]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1336]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE884]]
+; CHECK:       [[PRED_STORE_CONTINUE884]]:
+; CHECK-NEXT:    [[TMP1337:%.*]] = extractelement <64 x i1> [[TMP6]], i32 59
+; CHECK-NEXT:    br i1 [[TMP1337]], label %[[PRED_STORE_IF885:.*]], label %[[PRED_STORE_CONTINUE886:.*]]
+; CHECK:       [[PRED_STORE_IF885]]:
+; CHECK-NEXT:    [[TMP1338:%.*]] = add i64 [[INDEX]], 443
+; CHECK-NEXT:    [[TMP1339:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1338]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1339]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE886]]
+; CHECK:       [[PRED_STORE_CONTINUE886]]:
+; CHECK-NEXT:    [[TMP1340:%.*]] = extractelement <64 x i1> [[TMP6]], i32 60
+; CHECK-NEXT:    br i1 [[TMP1340]], label %[[PRED_STORE_IF887:.*]], label %[[PRED_STORE_CONTINUE888:.*]]
+; CHECK:       [[PRED_STORE_IF887]]:
+; CHECK-NEXT:    [[TMP1341:%.*]] = add i64 [[INDEX]], 444
+; CHECK-NEXT:    [[TMP1342:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1341]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1342]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE888]]
+; CHECK:       [[PRED_STORE_CONTINUE888]]:
+; CHECK-NEXT:    [[TMP1343:%.*]] = extractelement <64 x i1> [[TMP6]], i32 61
+; CHECK-NEXT:    br i1 [[TMP1343]], label %[[PRED_STORE_IF889:.*]], label %[[PRED_STORE_CONTINUE890:.*]]
+; CHECK:       [[PRED_STORE_IF889]]:
+; CHECK-NEXT:    [[TMP1344:%.*]] = add i64 [[INDEX]], 445
+; CHECK-NEXT:    [[TMP1345:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1344]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1345]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE890]]
+; CHECK:       [[PRED_STORE_CONTINUE890]]:
+; CHECK-NEXT:    [[TMP1346:%.*]] = extractelement <64 x i1> [[TMP6]], i32 62
+; CHECK-NEXT:    br i1 [[TMP1346]], label %[[PRED_STORE_IF891:.*]], label %[[PRED_STORE_CONTINUE892:.*]]
+; CHECK:       [[PRED_STORE_IF891]]:
+; CHECK-NEXT:    [[TMP1347:%.*]] = add i64 [[INDEX]], 446
+; CHECK-NEXT:    [[TMP1348:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1347]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1348]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE892]]
+; CHECK:       [[PRED_STORE_CONTINUE892]]:
+; CHECK-NEXT:    [[TMP1349:%.*]] = extractelement <64 x i1> [[TMP6]], i32 63
+; CHECK-NEXT:    br i1 [[TMP1349]], label %[[PRED_STORE_IF893:.*]], label %[[PRED_STORE_CONTINUE894:.*]]
+; CHECK:       [[PRED_STORE_IF893]]:
+; CHECK-NEXT:    [[TMP1350:%.*]] = add i64 [[INDEX]], 447
+; CHECK-NEXT:    [[TMP1351:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1350]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1351]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE894]]
+; CHECK:       [[PRED_STORE_CONTINUE894]]:
+; CHECK-NEXT:    [[TMP1352:%.*]] = extractelement <64 x i1> [[TMP7]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1352]], label %[[PRED_STORE_IF895:.*]], label %[[PRED_STORE_CONTINUE896:.*]]
+; CHECK:       [[PRED_STORE_IF895]]:
+; CHECK-NEXT:    [[TMP1353:%.*]] = add i64 [[INDEX]], 448
+; CHECK-NEXT:    [[TMP1354:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1353]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1354]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE896]]
+; CHECK:       [[PRED_STORE_CONTINUE896]]:
+; CHECK-NEXT:    [[TMP1355:%.*]] = extractelement <64 x i1> [[TMP7]], i32 1
+; CHECK-NEXT:    br i1 [[TMP1355]], label %[[PRED_STORE_IF897:.*]], label %[[PRED_STORE_CONTINUE898:.*]]
+; CHECK:       [[PRED_STORE_IF897]]:
+; CHECK-NEXT:    [[TMP1356:%.*]] = add i64 [[INDEX]], 449
+; CHECK-NEXT:    [[TMP1357:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1356]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1357]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE898]]
+; CHECK:       [[PRED_STORE_CONTINUE898]]:
+; CHECK-NEXT:    [[TMP1358:%.*]] = extractelement <64 x i1> [[TMP7]], i32 2
+; CHECK-NEXT:    br i1 [[TMP1358]], label %[[PRED_STORE_IF899:.*]], label %[[PRED_STORE_CONTINUE900:.*]]
+; CHECK:       [[PRED_STORE_IF899]]:
+; CHECK-NEXT:    [[TMP1359:%.*]] = add i64 [[INDEX]], 450
+; CHECK-NEXT:    [[TMP1360:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1359]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1360]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE900]]
+; CHECK:       [[PRED_STORE_CONTINUE900]]:
+; CHECK-NEXT:    [[TMP1361:%.*]] = extractelement <64 x i1> [[TMP7]], i32 3
+; CHECK-NEXT:    br i1 [[TMP1361]], label %[[PRED_STORE_IF901:.*]], label %[[PRED_STORE_CONTINUE902:.*]]
+; CHECK:       [[PRED_STORE_IF901]]:
+; CHECK-NEXT:    [[TMP1362:%.*]] = add i64 [[INDEX]], 451
+; CHECK-NEXT:    [[TMP1363:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1362]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1363]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE902]]
+; CHECK:       [[PRED_STORE_CONTINUE902]]:
+; CHECK-NEXT:    [[TMP1364:%.*]] = extractelement <64 x i1> [[TMP7]], i32 4
+; CHECK-NEXT:    br i1 [[TMP1364]], label %[[PRED_STORE_IF903:.*]], label %[[PRED_STORE_CONTINUE904:.*]]
+; CHECK:       [[PRED_STORE_IF903]]:
+; CHECK-NEXT:    [[TMP1365:%.*]] = add i64 [[INDEX]], 452
+; CHECK-NEXT:    [[TMP1366:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1365]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1366]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE904]]
+; CHECK:       [[PRED_STORE_CONTINUE904]]:
+; CHECK-NEXT:    [[TMP1367:%.*]] = extractelement <64 x i1> [[TMP7]], i32 5
+; CHECK-NEXT:    br i1 [[TMP1367]], label %[[PRED_STORE_IF905:.*]], label %[[PRED_STORE_CONTINUE906:.*]]
+; CHECK:       [[PRED_STORE_IF905]]:
+; CHECK-NEXT:    [[TMP1368:%.*]] = add i64 [[INDEX]], 453
+; CHECK-NEXT:    [[TMP1369:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1368]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1369]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE906]]
+; CHECK:       [[PRED_STORE_CONTINUE906]]:
+; CHECK-NEXT:    [[TMP1370:%.*]] = extractelement <64 x i1> [[TMP7]], i32 6
+; CHECK-NEXT:    br i1 [[TMP1370]], label %[[PRED_STORE_IF907:.*]], label %[[PRED_STORE_CONTINUE908:.*]]
+; CHECK:       [[PRED_STORE_IF907]]:
+; CHECK-NEXT:    [[TMP1371:%.*]] = add i64 [[INDEX]], 454
+; CHECK-NEXT:    [[TMP1372:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1371]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1372]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE908]]
+; CHECK:       [[PRED_STORE_CONTINUE908]]:
+; CHECK-NEXT:    [[TMP1373:%.*]] = extractelement <64 x i1> [[TMP7]], i32 7
+; CHECK-NEXT:    br i1 [[TMP1373]], label %[[PRED_STORE_IF909:.*]], label %[[PRED_STORE_CONTINUE910:.*]]
+; CHECK:       [[PRED_STORE_IF909]]:
+; CHECK-NEXT:    [[TMP1374:%.*]] = add i64 [[INDEX]], 455
+; CHECK-NEXT:    [[TMP1375:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1374]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1375]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE910]]
+; CHECK:       [[PRED_STORE_CONTINUE910]]:
+; CHECK-NEXT:    [[TMP1376:%.*]] = extractelement <64 x i1> [[TMP7]], i32 8
+; CHECK-NEXT:    br i1 [[TMP1376]], label %[[PRED_STORE_IF911:.*]], label %[[PRED_STORE_CONTINUE912:.*]]
+; CHECK:       [[PRED_STORE_IF911]]:
+; CHECK-NEXT:    [[TMP1377:%.*]] = add i64 [[INDEX]], 456
+; CHECK-NEXT:    [[TMP1378:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1377]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1378]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE912]]
+; CHECK:       [[PRED_STORE_CONTINUE912]]:
+; CHECK-NEXT:    [[TMP1379:%.*]] = extractelement <64 x i1> [[TMP7]], i32 9
+; CHECK-NEXT:    br i1 [[TMP1379]], label %[[PRED_STORE_IF913:.*]], label %[[PRED_STORE_CONTINUE914:.*]]
+; CHECK:       [[PRED_STORE_IF913]]:
+; CHECK-NEXT:    [[TMP1380:%.*]] = add i64 [[INDEX]], 457
+; CHECK-NEXT:    [[TMP1381:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1380]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1381]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE914]]
+; CHECK:       [[PRED_STORE_CONTINUE914]]:
+; CHECK-NEXT:    [[TMP1382:%.*]] = extractelement <64 x i1> [[TMP7]], i32 10
+; CHECK-NEXT:    br i1 [[TMP1382]], label %[[PRED_STORE_IF915:.*]], label %[[PRED_STORE_CONTINUE916:.*]]
+; CHECK:       [[PRED_STORE_IF915]]:
+; CHECK-NEXT:    [[TMP1383:%.*]] = add i64 [[INDEX]], 458
+; CHECK-NEXT:    [[TMP1384:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1383]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1384]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE916]]
+; CHECK:       [[PRED_STORE_CONTINUE916]]:
+; CHECK-NEXT:    [[TMP1385:%.*]] = extractelement <64 x i1> [[TMP7]], i32 11
+; CHECK-NEXT:    br i1 [[TMP1385]], label %[[PRED_STORE_IF917:.*]], label %[[PRED_STORE_CONTINUE918:.*]]
+; CHECK:       [[PRED_STORE_IF917]]:
+; CHECK-NEXT:    [[TMP1386:%.*]] = add i64 [[INDEX]], 459
+; CHECK-NEXT:    [[TMP1387:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1386]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1387]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE918]]
+; CHECK:       [[PRED_STORE_CONTINUE918]]:
+; CHECK-NEXT:    [[TMP1388:%.*]] = extractelement <64 x i1> [[TMP7]], i32 12
+; CHECK-NEXT:    br i1 [[TMP1388]], label %[[PRED_STORE_IF919:.*]], label %[[PRED_STORE_CONTINUE920:.*]]
+; CHECK:       [[PRED_STORE_IF919]]:
+; CHECK-NEXT:    [[TMP1389:%.*]] = add i64 [[INDEX]], 460
+; CHECK-NEXT:    [[TMP1390:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1389]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1390]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE920]]
+; CHECK:       [[PRED_STORE_CONTINUE920]]:
+; CHECK-NEXT:    [[TMP1391:%.*]] = extractelement <64 x i1> [[TMP7]], i32 13
+; CHECK-NEXT:    br i1 [[TMP1391]], label %[[PRED_STORE_IF921:.*]], label %[[PRED_STORE_CONTINUE922:.*]]
+; CHECK:       [[PRED_STORE_IF921]]:
+; CHECK-NEXT:    [[TMP1392:%.*]] = add i64 [[INDEX]], 461
+; CHECK-NEXT:    [[TMP1393:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1392]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1393]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE922]]
+; CHECK:       [[PRED_STORE_CONTINUE922]]:
+; CHECK-NEXT:    [[TMP1394:%.*]] = extractelement <64 x i1> [[TMP7]], i32 14
+; CHECK-NEXT:    br i1 [[TMP1394]], label %[[PRED_STORE_IF923:.*]], label %[[PRED_STORE_CONTINUE924:.*]]
+; CHECK:       [[PRED_STORE_IF923]]:
+; CHECK-NEXT:    [[TMP1395:%.*]] = add i64 [[INDEX]], 462
+; CHECK-NEXT:    [[TMP1396:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1395]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1396]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE924]]
+; CHECK:       [[PRED_STORE_CONTINUE924]]:
+; CHECK-NEXT:    [[TMP1397:%.*]] = extractelement <64 x i1> [[TMP7]], i32 15
+; CHECK-NEXT:    br i1 [[TMP1397]], label %[[PRED_STORE_IF925:.*]], label %[[PRED_STORE_CONTINUE926:.*]]
+; CHECK:       [[PRED_STORE_IF925]]:
+; CHECK-NEXT:    [[TMP1398:%.*]] = add i64 [[INDEX]], 463
+; CHECK-NEXT:    [[TMP1399:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1398]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1399]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE926]]
+; CHECK:       [[PRED_STORE_CONTINUE926]]:
+; CHECK-NEXT:    [[TMP1400:%.*]] = extractelement <64 x i1> [[TMP7]], i32 16
+; CHECK-NEXT:    br i1 [[TMP1400]], label %[[PRED_STORE_IF927:.*]], label %[[PRED_STORE_CONTINUE928:.*]]
+; CHECK:       [[PRED_STORE_IF927]]:
+; CHECK-NEXT:    [[TMP1401:%.*]] = add i64 [[INDEX]], 464
+; CHECK-NEXT:    [[TMP1402:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1401]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1402]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE928]]
+; CHECK:       [[PRED_STORE_CONTINUE928]]:
+; CHECK-NEXT:    [[TMP1403:%.*]] = extractelement <64 x i1> [[TMP7]], i32 17
+; CHECK-NEXT:    br i1 [[TMP1403]], label %[[PRED_STORE_IF929:.*]], label %[[PRED_STORE_CONTINUE930:.*]]
+; CHECK:       [[PRED_STORE_IF929]]:
+; CHECK-NEXT:    [[TMP1404:%.*]] = add i64 [[INDEX]], 465
+; CHECK-NEXT:    [[TMP1405:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1404]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1405]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE930]]
+; CHECK:       [[PRED_STORE_CONTINUE930]]:
+; CHECK-NEXT:    [[TMP1406:%.*]] = extractelement <64 x i1> [[TMP7]], i32 18
+; CHECK-NEXT:    br i1 [[TMP1406]], label %[[PRED_STORE_IF931:.*]], label %[[PRED_STORE_CONTINUE932:.*]]
+; CHECK:       [[PRED_STORE_IF931]]:
+; CHECK-NEXT:    [[TMP1407:%.*]] = add i64 [[INDEX]], 466
+; CHECK-NEXT:    [[TMP1408:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1407]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1408]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE932]]
+; CHECK:       [[PRED_STORE_CONTINUE932]]:
+; CHECK-NEXT:    [[TMP1409:%.*]] = extractelement <64 x i1> [[TMP7]], i32 19
+; CHECK-NEXT:    br i1 [[TMP1409]], label %[[PRED_STORE_IF933:.*]], label %[[PRED_STORE_CONTINUE934:.*]]
+; CHECK:       [[PRED_STORE_IF933]]:
+; CHECK-NEXT:    [[TMP1410:%.*]] = add i64 [[INDEX]], 467
+; CHECK-NEXT:    [[TMP1411:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1410]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1411]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE934]]
+; CHECK:       [[PRED_STORE_CONTINUE934]]:
+; CHECK-NEXT:    [[TMP1412:%.*]] = extractelement <64 x i1> [[TMP7]], i32 20
+; CHECK-NEXT:    br i1 [[TMP1412]], label %[[PRED_STORE_IF935:.*]], label %[[PRED_STORE_CONTINUE936:.*]]
+; CHECK:       [[PRED_STORE_IF935]]:
+; CHECK-NEXT:    [[TMP1413:%.*]] = add i64 [[INDEX]], 468
+; CHECK-NEXT:    [[TMP1414:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1413]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1414]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE936]]
+; CHECK:       [[PRED_STORE_CONTINUE936]]:
+; CHECK-NEXT:    [[TMP1415:%.*]] = extractelement <64 x i1> [[TMP7]], i32 21
+; CHECK-NEXT:    br i1 [[TMP1415]], label %[[PRED_STORE_IF937:.*]], label %[[PRED_STORE_CONTINUE938:.*]]
+; CHECK:       [[PRED_STORE_IF937]]:
+; CHECK-NEXT:    [[TMP1416:%.*]] = add i64 [[INDEX]], 469
+; CHECK-NEXT:    [[TMP1417:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1416]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1417]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE938]]
+; CHECK:       [[PRED_STORE_CONTINUE938]]:
+; CHECK-NEXT:    [[TMP1418:%.*]] = extractelement <64 x i1> [[TMP7]], i32 22
+; CHECK-NEXT:    br i1 [[TMP1418]], label %[[PRED_STORE_IF939:.*]], label %[[PRED_STORE_CONTINUE940:.*]]
+; CHECK:       [[PRED_STORE_IF939]]:
+; CHECK-NEXT:    [[TMP1419:%.*]] = add i64 [[INDEX]], 470
+; CHECK-NEXT:    [[TMP1420:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1419]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1420]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE940]]
+; CHECK:       [[PRED_STORE_CONTINUE940]]:
+; CHECK-NEXT:    [[TMP1421:%.*]] = extractelement <64 x i1> [[TMP7]], i32 23
+; CHECK-NEXT:    br i1 [[TMP1421]], label %[[PRED_STORE_IF941:.*]], label %[[PRED_STORE_CONTINUE942:.*]]
+; CHECK:       [[PRED_STORE_IF941]]:
+; CHECK-NEXT:    [[TMP1422:%.*]] = add i64 [[INDEX]], 471
+; CHECK-NEXT:    [[TMP1423:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1422]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1423]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE942]]
+; CHECK:       [[PRED_STORE_CONTINUE942]]:
+; CHECK-NEXT:    [[TMP1424:%.*]] = extractelement <64 x i1> [[TMP7]], i32 24
+; CHECK-NEXT:    br i1 [[TMP1424]], label %[[PRED_STORE_IF943:.*]], label %[[PRED_STORE_CONTINUE944:.*]]
+; CHECK:       [[PRED_STORE_IF943]]:
+; CHECK-NEXT:    [[TMP1425:%.*]] = add i64 [[INDEX]], 472
+; CHECK-NEXT:    [[TMP1426:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1425]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1426]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE944]]
+; CHECK:       [[PRED_STORE_CONTINUE944]]:
+; CHECK-NEXT:    [[TMP1427:%.*]] = extractelement <64 x i1> [[TMP7]], i32 25
+; CHECK-NEXT:    br i1 [[TMP1427]], label %[[PRED_STORE_IF945:.*]], label %[[PRED_STORE_CONTINUE946:.*]]
+; CHECK:       [[PRED_STORE_IF945]]:
+; CHECK-NEXT:    [[TMP1428:%.*]] = add i64 [[INDEX]], 473
+; CHECK-NEXT:    [[TMP1429:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1428]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1429]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE946]]
+; CHECK:       [[PRED_STORE_CONTINUE946]]:
+; CHECK-NEXT:    [[TMP1430:%.*]] = extractelement <64 x i1> [[TMP7]], i32 26
+; CHECK-NEXT:    br i1 [[TMP1430]], label %[[PRED_STORE_IF947:.*]], label %[[PRED_STORE_CONTINUE948:.*]]
+; CHECK:       [[PRED_STORE_IF947]]:
+; CHECK-NEXT:    [[TMP1431:%.*]] = add i64 [[INDEX]], 474
+; CHECK-NEXT:    [[TMP1432:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1431]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1432]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE948]]
+; CHECK:       [[PRED_STORE_CONTINUE948]]:
+; CHECK-NEXT:    [[TMP1433:%.*]] = extractelement <64 x i1> [[TMP7]], i32 27
+; CHECK-NEXT:    br i1 [[TMP1433]], label %[[PRED_STORE_IF949:.*]], label %[[PRED_STORE_CONTINUE950:.*]]
+; CHECK:       [[PRED_STORE_IF949]]:
+; CHECK-NEXT:    [[TMP1434:%.*]] = add i64 [[INDEX]], 475
+; CHECK-NEXT:    [[TMP1435:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1434]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1435]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE950]]
+; CHECK:       [[PRED_STORE_CONTINUE950]]:
+; CHECK-NEXT:    [[TMP1436:%.*]] = extractelement <64 x i1> [[TMP7]], i32 28
+; CHECK-NEXT:    br i1 [[TMP1436]], label %[[PRED_STORE_IF951:.*]], label %[[PRED_STORE_CONTINUE952:.*]]
+; CHECK:       [[PRED_STORE_IF951]]:
+; CHECK-NEXT:    [[TMP1437:%.*]] = add i64 [[INDEX]], 476
+; CHECK-NEXT:    [[TMP1438:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1437]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1438]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE952]]
+; CHECK:       [[PRED_STORE_CONTINUE952]]:
+; CHECK-NEXT:    [[TMP1439:%.*]] = extractelement <64 x i1> [[TMP7]], i32 29
+; CHECK-NEXT:    br i1 [[TMP1439]], label %[[PRED_STORE_IF953:.*]], label %[[PRED_STORE_CONTINUE954:.*]]
+; CHECK:       [[PRED_STORE_IF953]]:
+; CHECK-NEXT:    [[TMP1440:%.*]] = add i64 [[INDEX]], 477
+; CHECK-NEXT:    [[TMP1441:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1440]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1441]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE954]]
+; CHECK:       [[PRED_STORE_CONTINUE954]]:
+; CHECK-NEXT:    [[TMP1442:%.*]] = extractelement <64 x i1> [[TMP7]], i32 30
+; CHECK-NEXT:    br i1 [[TMP1442]], label %[[PRED_STORE_IF955:.*]], label %[[PRED_STORE_CONTINUE956:.*]]
+; CHECK:       [[PRED_STORE_IF955]]:
+; CHECK-NEXT:    [[TMP1443:%.*]] = add i64 [[INDEX]], 478
+; CHECK-NEXT:    [[TMP1444:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1443]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1444]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE956]]
+; CHECK:       [[PRED_STORE_CONTINUE956]]:
+; CHECK-NEXT:    [[TMP1445:%.*]] = extractelement <64 x i1> [[TMP7]], i32 31
+; CHECK-NEXT:    br i1 [[TMP1445]], label %[[PRED_STORE_IF957:.*]], label %[[PRED_STORE_CONTINUE958:.*]]
+; CHECK:       [[PRED_STORE_IF957]]:
+; CHECK-NEXT:    [[TMP1446:%.*]] = add i64 [[INDEX]], 479
+; CHECK-NEXT:    [[TMP1447:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1446]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1447]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE958]]
+; CHECK:       [[PRED_STORE_CONTINUE958]]:
+; CHECK-NEXT:    [[TMP1448:%.*]] = extractelement <64 x i1> [[TMP7]], i32 32
+; CHECK-NEXT:    br i1 [[TMP1448]], label %[[PRED_STORE_IF959:.*]], label %[[PRED_STORE_CONTINUE960:.*]]
+; CHECK:       [[PRED_STORE_IF959]]:
+; CHECK-NEXT:    [[TMP1449:%.*]] = add i64 [[INDEX]], 480
+; CHECK-NEXT:    [[TMP1450:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1449]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1450]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE960]]
+; CHECK:       [[PRED_STORE_CONTINUE960]]:
+; CHECK-NEXT:    [[TMP1451:%.*]] = extractelement <64 x i1> [[TMP7]], i32 33
+; CHECK-NEXT:    br i1 [[TMP1451]], label %[[PRED_STORE_IF961:.*]], label %[[PRED_STORE_CONTINUE962:.*]]
+; CHECK:       [[PRED_STORE_IF961]]:
+; CHECK-NEXT:    [[TMP1452:%.*]] = add i64 [[INDEX]], 481
+; CHECK-NEXT:    [[TMP1453:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1452]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1453]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE962]]
+; CHECK:       [[PRED_STORE_CONTINUE962]]:
+; CHECK-NEXT:    [[TMP1454:%.*]] = extractelement <64 x i1> [[TMP7]], i32 34
+; CHECK-NEXT:    br i1 [[TMP1454]], label %[[PRED_STORE_IF963:.*]], label %[[PRED_STORE_CONTINUE964:.*]]
+; CHECK:       [[PRED_STORE_IF963]]:
+; CHECK-NEXT:    [[TMP1455:%.*]] = add i64 [[INDEX]], 482
+; CHECK-NEXT:    [[TMP1456:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1455]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1456]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE964]]
+; CHECK:       [[PRED_STORE_CONTINUE964]]:
+; CHECK-NEXT:    [[TMP1457:%.*]] = extractelement <64 x i1> [[TMP7]], i32 35
+; CHECK-NEXT:    br i1 [[TMP1457]], label %[[PRED_STORE_IF965:.*]], label %[[PRED_STORE_CONTINUE966:.*]]
+; CHECK:       [[PRED_STORE_IF965]]:
+; CHECK-NEXT:    [[TMP1458:%.*]] = add i64 [[INDEX]], 483
+; CHECK-NEXT:    [[TMP1459:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1458]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1459]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE966]]
+; CHECK:       [[PRED_STORE_CONTINUE966]]:
+; CHECK-NEXT:    [[TMP1460:%.*]] = extractelement <64 x i1> [[TMP7]], i32 36
+; CHECK-NEXT:    br i1 [[TMP1460]], label %[[PRED_STORE_IF967:.*]], label %[[PRED_STORE_CONTINUE968:.*]]
+; CHECK:       [[PRED_STORE_IF967]]:
+; CHECK-NEXT:    [[TMP1461:%.*]] = add i64 [[INDEX]], 484
+; CHECK-NEXT:    [[TMP1462:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1461]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1462]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE968]]
+; CHECK:       [[PRED_STORE_CONTINUE968]]:
+; CHECK-NEXT:    [[TMP1463:%.*]] = extractelement <64 x i1> [[TMP7]], i32 37
+; CHECK-NEXT:    br i1 [[TMP1463]], label %[[PRED_STORE_IF969:.*]], label %[[PRED_STORE_CONTINUE970:.*]]
+; CHECK:       [[PRED_STORE_IF969]]:
+; CHECK-NEXT:    [[TMP1464:%.*]] = add i64 [[INDEX]], 485
+; CHECK-NEXT:    [[TMP1465:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1464]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1465]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE970]]
+; CHECK:       [[PRED_STORE_CONTINUE970]]:
+; CHECK-NEXT:    [[TMP1466:%.*]] = extractelement <64 x i1> [[TMP7]], i32 38
+; CHECK-NEXT:    br i1 [[TMP1466]], label %[[PRED_STORE_IF971:.*]], label %[[PRED_STORE_CONTINUE972:.*]]
+; CHECK:       [[PRED_STORE_IF971]]:
+; CHECK-NEXT:    [[TMP1467:%.*]] = add i64 [[INDEX]], 486
+; CHECK-NEXT:    [[TMP1468:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1467]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1468]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE972]]
+; CHECK:       [[PRED_STORE_CONTINUE972]]:
+; CHECK-NEXT:    [[TMP1469:%.*]] = extractelement <64 x i1> [[TMP7]], i32 39
+; CHECK-NEXT:    br i1 [[TMP1469]], label %[[PRED_STORE_IF973:.*]], label %[[PRED_STORE_CONTINUE974:.*]]
+; CHECK:       [[PRED_STORE_IF973]]:
+; CHECK-NEXT:    [[TMP1470:%.*]] = add i64 [[INDEX]], 487
+; CHECK-NEXT:    [[TMP1471:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1470]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1471]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE974]]
+; CHECK:       [[PRED_STORE_CONTINUE974]]:
+; CHECK-NEXT:    [[TMP1472:%.*]] = extractelement <64 x i1> [[TMP7]], i32 40
+; CHECK-NEXT:    br i1 [[TMP1472]], label %[[PRED_STORE_IF975:.*]], label %[[PRED_STORE_CONTINUE976:.*]]
+; CHECK:       [[PRED_STORE_IF975]]:
+; CHECK-NEXT:    [[TMP1473:%.*]] = add i64 [[INDEX]], 488
+; CHECK-NEXT:    [[TMP1474:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1473]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1474]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE976]]
+; CHECK:       [[PRED_STORE_CONTINUE976]]:
+; CHECK-NEXT:    [[TMP1475:%.*]] = extractelement <64 x i1> [[TMP7]], i32 41
+; CHECK-NEXT:    br i1 [[TMP1475]], label %[[PRED_STORE_IF977:.*]], label %[[PRED_STORE_CONTINUE978:.*]]
+; CHECK:       [[PRED_STORE_IF977]]:
+; CHECK-NEXT:    [[TMP1476:%.*]] = add i64 [[INDEX]], 489
+; CHECK-NEXT:    [[TMP1477:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1476]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1477]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE978]]
+; CHECK:       [[PRED_STORE_CONTINUE978]]:
+; CHECK-NEXT:    [[TMP1478:%.*]] = extractelement <64 x i1> [[TMP7]], i32 42
+; CHECK-NEXT:    br i1 [[TMP1478]], label %[[PRED_STORE_IF979:.*]], label %[[PRED_STORE_CONTINUE980:.*]]
+; CHECK:       [[PRED_STORE_IF979]]:
+; CHECK-NEXT:    [[TMP1479:%.*]] = add i64 [[INDEX]], 490
+; CHECK-NEXT:    [[TMP1480:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1479]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1480]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE980]]
+; CHECK:       [[PRED_STORE_CONTINUE980]]:
+; CHECK-NEXT:    [[TMP1481:%.*]] = extractelement <64 x i1> [[TMP7]], i32 43
+; CHECK-NEXT:    br i1 [[TMP1481]], label %[[PRED_STORE_IF981:.*]], label %[[PRED_STORE_CONTINUE982:.*]]
+; CHECK:       [[PRED_STORE_IF981]]:
+; CHECK-NEXT:    [[TMP1482:%.*]] = add i64 [[INDEX]], 491
+; CHECK-NEXT:    [[TMP1483:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1482]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1483]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE982]]
+; CHECK:       [[PRED_STORE_CONTINUE982]]:
+; CHECK-NEXT:    [[TMP1484:%.*]] = extractelement <64 x i1> [[TMP7]], i32 44
+; CHECK-NEXT:    br i1 [[TMP1484]], label %[[PRED_STORE_IF983:.*]], label %[[PRED_STORE_CONTINUE984:.*]]
+; CHECK:       [[PRED_STORE_IF983]]:
+; CHECK-NEXT:    [[TMP1485:%.*]] = add i64 [[INDEX]], 492
+; CHECK-NEXT:    [[TMP1486:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1485]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1486]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE984]]
+; CHECK:       [[PRED_STORE_CONTINUE984]]:
+; CHECK-NEXT:    [[TMP1487:%.*]] = extractelement <64 x i1> [[TMP7]], i32 45
+; CHECK-NEXT:    br i1 [[TMP1487]], label %[[PRED_STORE_IF985:.*]], label %[[PRED_STORE_CONTINUE986:.*]]
+; CHECK:       [[PRED_STORE_IF985]]:
+; CHECK-NEXT:    [[TMP1488:%.*]] = add i64 [[INDEX]], 493
+; CHECK-NEXT:    [[TMP1489:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1488]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1489]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE986]]
+; CHECK:       [[PRED_STORE_CONTINUE986]]:
+; CHECK-NEXT:    [[TMP1490:%.*]] = extractelement <64 x i1> [[TMP7]], i32 46
+; CHECK-NEXT:    br i1 [[TMP1490]], label %[[PRED_STORE_IF987:.*]], label %[[PRED_STORE_CONTINUE988:.*]]
+; CHECK:       [[PRED_STORE_IF987]]:
+; CHECK-NEXT:    [[TMP1491:%.*]] = add i64 [[INDEX]], 494
+; CHECK-NEXT:    [[TMP1492:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1491]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1492]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE988]]
+; CHECK:       [[PRED_STORE_CONTINUE988]]:
+; CHECK-NEXT:    [[TMP1493:%.*]] = extractelement <64 x i1> [[TMP7]], i32 47
+; CHECK-NEXT:    br i1 [[TMP1493]], label %[[PRED_STORE_IF989:.*]], label %[[PRED_STORE_CONTINUE990:.*]]
+; CHECK:       [[PRED_STORE_IF989]]:
+; CHECK-NEXT:    [[TMP1494:%.*]] = add i64 [[INDEX]], 495
+; CHECK-NEXT:    [[TMP1495:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1494]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1495]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE990]]
+; CHECK:       [[PRED_STORE_CONTINUE990]]:
+; CHECK-NEXT:    [[TMP1496:%.*]] = extractelement <64 x i1> [[TMP7]], i32 48
+; CHECK-NEXT:    br i1 [[TMP1496]], label %[[PRED_STORE_IF991:.*]], label %[[PRED_STORE_CONTINUE992:.*]]
+; CHECK:       [[PRED_STORE_IF991]]:
+; CHECK-NEXT:    [[TMP1497:%.*]] = add i64 [[INDEX]], 496
+; CHECK-NEXT:    [[TMP1498:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1497]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1498]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE992]]
+; CHECK:       [[PRED_STORE_CONTINUE992]]:
+; CHECK-NEXT:    [[TMP1499:%.*]] = extractelement <64 x i1> [[TMP7]], i32 49
+; CHECK-NEXT:    br i1 [[TMP1499]], label %[[PRED_STORE_IF993:.*]], label %[[PRED_STORE_CONTINUE994:.*]]
+; CHECK:       [[PRED_STORE_IF993]]:
+; CHECK-NEXT:    [[TMP1500:%.*]] = add i64 [[INDEX]], 497
+; CHECK-NEXT:    [[TMP1501:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1500]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1501]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE994]]
+; CHECK:       [[PRED_STORE_CONTINUE994]]:
+; CHECK-NEXT:    [[TMP1502:%.*]] = extractelement <64 x i1> [[TMP7]], i32 50
+; CHECK-NEXT:    br i1 [[TMP1502]], label %[[PRED_STORE_IF995:.*]], label %[[PRED_STORE_CONTINUE996:.*]]
+; CHECK:       [[PRED_STORE_IF995]]:
+; CHECK-NEXT:    [[TMP1503:%.*]] = add i64 [[INDEX]], 498
+; CHECK-NEXT:    [[TMP1504:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1503]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1504]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE996]]
+; CHECK:       [[PRED_STORE_CONTINUE996]]:
+; CHECK-NEXT:    [[TMP1505:%.*]] = extractelement <64 x i1> [[TMP7]], i32 51
+; CHECK-NEXT:    br i1 [[TMP1505]], label %[[PRED_STORE_IF997:.*]], label %[[PRED_STORE_CONTINUE998:.*]]
+; CHECK:       [[PRED_STORE_IF997]]:
+; CHECK-NEXT:    [[TMP1506:%.*]] = add i64 [[INDEX]], 499
+; CHECK-NEXT:    [[TMP1507:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1506]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1507]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE998]]
+; CHECK:       [[PRED_STORE_CONTINUE998]]:
+; CHECK-NEXT:    [[TMP1508:%.*]] = extractelement <64 x i1> [[TMP7]], i32 52
+; CHECK-NEXT:    br i1 [[TMP1508]], label %[[PRED_STORE_IF999:.*]], label %[[PRED_STORE_CONTINUE1000:.*]]
+; CHECK:       [[PRED_STORE_IF999]]:
+; CHECK-NEXT:    [[TMP1509:%.*]] = add i64 [[INDEX]], 500
+; CHECK-NEXT:    [[TMP1510:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1509]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1510]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1000]]
+; CHECK:       [[PRED_STORE_CONTINUE1000]]:
+; CHECK-NEXT:    [[TMP1511:%.*]] = extractelement <64 x i1> [[TMP7]], i32 53
+; CHECK-NEXT:    br i1 [[TMP1511]], label %[[PRED_STORE_IF1001:.*]], label %[[PRED_STORE_CONTINUE1002:.*]]
+; CHECK:       [[PRED_STORE_IF1001]]:
+; CHECK-NEXT:    [[TMP1512:%.*]] = add i64 [[INDEX]], 501
+; CHECK-NEXT:    [[TMP1513:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1512]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1513]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1002]]
+; CHECK:       [[PRED_STORE_CONTINUE1002]]:
+; CHECK-NEXT:    [[TMP1514:%.*]] = extractelement <64 x i1> [[TMP7]], i32 54
+; CHECK-NEXT:    br i1 [[TMP1514]], label %[[PRED_STORE_IF1003:.*]], label %[[PRED_STORE_CONTINUE1004:.*]]
+; CHECK:       [[PRED_STORE_IF1003]]:
+; CHECK-NEXT:    [[TMP1515:%.*]] = add i64 [[INDEX]], 502
+; CHECK-NEXT:    [[TMP1516:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1515]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1516]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1004]]
+; CHECK:       [[PRED_STORE_CONTINUE1004]]:
+; CHECK-NEXT:    [[TMP1517:%.*]] = extractelement <64 x i1> [[TMP7]], i32 55
+; CHECK-NEXT:    br i1 [[TMP1517]], label %[[PRED_STORE_IF1005:.*]], label %[[PRED_STORE_CONTINUE1006:.*]]
+; CHECK:       [[PRED_STORE_IF1005]]:
+; CHECK-NEXT:    [[TMP1518:%.*]] = add i64 [[INDEX]], 503
+; CHECK-NEXT:    [[TMP1519:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1518]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1519]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1006]]
+; CHECK:       [[PRED_STORE_CONTINUE1006]]:
+; CHECK-NEXT:    [[TMP1520:%.*]] = extractelement <64 x i1> [[TMP7]], i32 56
+; CHECK-NEXT:    br i1 [[TMP1520]], label %[[PRED_STORE_IF1007:.*]], label %[[PRED_STORE_CONTINUE1008:.*]]
+; CHECK:       [[PRED_STORE_IF1007]]:
+; CHECK-NEXT:    [[TMP1521:%.*]] = add i64 [[INDEX]], 504
+; CHECK-NEXT:    [[TMP1522:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1521]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1522]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1008]]
+; CHECK:       [[PRED_STORE_CONTINUE1008]]:
+; CHECK-NEXT:    [[TMP1523:%.*]] = extractelement <64 x i1> [[TMP7]], i32 57
+; CHECK-NEXT:    br i1 [[TMP1523]], label %[[PRED_STORE_IF1009:.*]], label %[[PRED_STORE_CONTINUE1010:.*]]
+; CHECK:       [[PRED_STORE_IF1009]]:
+; CHECK-NEXT:    [[TMP1524:%.*]] = add i64 [[INDEX]], 505
+; CHECK-NEXT:    [[TMP1525:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1524]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1525]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1010]]
+; CHECK:       [[PRED_STORE_CONTINUE1010]]:
+; CHECK-NEXT:    [[TMP1526:%.*]] = extractelement <64 x i1> [[TMP7]], i32 58
+; CHECK-NEXT:    br i1 [[TMP1526]], label %[[PRED_STORE_IF1011:.*]], label %[[PRED_STORE_CONTINUE1012:.*]]
+; CHECK:       [[PRED_STORE_IF1011]]:
+; CHECK-NEXT:    [[TMP1527:%.*]] = add i64 [[INDEX]], 506
+; CHECK-NEXT:    [[TMP1528:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1527]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1528]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1012]]
+; CHECK:       [[PRED_STORE_CONTINUE1012]]:
+; CHECK-NEXT:    [[TMP1529:%.*]] = extractelement <64 x i1> [[TMP7]], i32 59
+; CHECK-NEXT:    br i1 [[TMP1529]], label %[[PRED_STORE_IF1013:.*]], label %[[PRED_STORE_CONTINUE1014:.*]]
+; CHECK:       [[PRED_STORE_IF1013]]:
+; CHECK-NEXT:    [[TMP1530:%.*]] = add i64 [[INDEX]], 507
+; CHECK-NEXT:    [[TMP1531:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1530]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1531]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1014]]
+; CHECK:       [[PRED_STORE_CONTINUE1014]]:
+; CHECK-NEXT:    [[TMP1532:%.*]] = extractelement <64 x i1> [[TMP7]], i32 60
+; CHECK-NEXT:    br i1 [[TMP1532]], label %[[PRED_STORE_IF1015:.*]], label %[[PRED_STORE_CONTINUE1016:.*]]
+; CHECK:       [[PRED_STORE_IF1015]]:
+; CHECK-NEXT:    [[TMP1533:%.*]] = add i64 [[INDEX]], 508
+; CHECK-NEXT:    [[TMP1534:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1533]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1534]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1016]]
+; CHECK:       [[PRED_STORE_CONTINUE1016]]:
+; CHECK-NEXT:    [[TMP1535:%.*]] = extractelement <64 x i1> [[TMP7]], i32 61
+; CHECK-NEXT:    br i1 [[TMP1535]], label %[[PRED_STORE_IF1017:.*]], label %[[PRED_STORE_CONTINUE1018:.*]]
+; CHECK:       [[PRED_STORE_IF1017]]:
+; CHECK-NEXT:    [[TMP1536:%.*]] = add i64 [[INDEX]], 509
+; CHECK-NEXT:    [[TMP1537:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1536]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1537]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1018]]
+; CHECK:       [[PRED_STORE_CONTINUE1018]]:
+; CHECK-NEXT:    [[TMP1538:%.*]] = extractelement <64 x i1> [[TMP7]], i32 62
+; CHECK-NEXT:    br i1 [[TMP1538]], label %[[PRED_STORE_IF1019:.*]], label %[[PRED_STORE_CONTINUE1020:.*]]
+; CHECK:       [[PRED_STORE_IF1019]]:
+; CHECK-NEXT:    [[TMP1539:%.*]] = add i64 [[INDEX]], 510
+; CHECK-NEXT:    [[TMP1540:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1539]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1540]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1020]]
+; CHECK:       [[PRED_STORE_CONTINUE1020]]:
+; CHECK-NEXT:    [[TMP1541:%.*]] = extractelement <64 x i1> [[TMP7]], i32 63
+; CHECK-NEXT:    br i1 [[TMP1541]], label %[[PRED_STORE_IF1021:.*]], label %[[PRED_STORE_CONTINUE1022]]
+; CHECK:       [[PRED_STORE_IF1021]]:
+; CHECK-NEXT:    [[TMP1542:%.*]] = add i64 [[INDEX]], 511
+; CHECK-NEXT:    [[TMP1543:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1542]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1543]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1022]]
+; CHECK:       [[PRED_STORE_CONTINUE1022]]:
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <64 x i64> [[STEP_ADD_7]], splat (i64 64)
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 512
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 512, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 15
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 15
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
index 252d6b063eee9..d97501b7b52a7 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
@@ -1,20 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -p loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
+; RUN: opt -p loop-vectorize -force-vector-width=8 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
 
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
-target triple = "aarch64-unknown-linux-gnu"
-
-define dso_local void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_small_tc_i8(
-; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_small_tc_i8(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 14)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 14)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -80,7 +77,7 @@ define dso_local void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p)
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -113,17 +110,17 @@ end:
   ret void
 }
 
-define dso_local void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_upper_limit_i8(
-; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_upper_limit_i8(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 254)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 -2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -189,7 +186,7 @@ define dso_local void @canonical_upper_limit_i8(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -222,17 +219,17 @@ end:
   ret void
 }
 
-define dso_local void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_lower_limit_i16(
-; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_lower_limit_i16(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 256)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i16> [[VEC_IND]], splat (i16 256)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -298,7 +295,7 @@ define dso_local void @canonical_lower_limit_i16(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 264
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -331,17 +328,17 @@ end:
   ret void
 }
 
-define dso_local void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_upper_limit_i16(
-; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_upper_limit_i16(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 65534)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i16> [[VEC_IND]], splat (i16 -2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -407,7 +404,7 @@ define dso_local void @canonical_upper_limit_i16(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -440,17 +437,17 @@ end:
   ret void
 }
 
-define dso_local void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_lower_limit_i32(
-; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_lower_limit_i32(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 65536)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], splat (i32 65536)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -516,7 +513,7 @@ define dso_local void @canonical_lower_limit_i32(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65544
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -549,17 +546,17 @@ end:
   ret void
 }
 
-define dso_local void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_upper_limit_i32(
-; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_upper_limit_i32(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 4294967294)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], splat (i32 -2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -625,7 +622,7 @@ define dso_local void @canonical_upper_limit_i32(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967296
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -658,9 +655,9 @@ end:
   ret void
 }
 
-define dso_local void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_lower_limit_i64(
-; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_lower_limit_i64(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
@@ -767,9 +764,9 @@ end:
   ret void
 }
 
-define dso_local void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_upper_limit_i64(
-; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_upper_limit_i64(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
@@ -876,6 +873,115 @@ end:
   ret void
 }
 
+define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_lower_limit_i128(
+; CHECK-SAME: ptr noundef writeonly captures(none) [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i256 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i128> [ <i128 0, i128 1, i128 2, i128 3, i128 4, i128 5, i128 6, i128 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i128> [[VEC_IND]], splat (i128 18446744073709551616)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i256 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i256 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i256 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i256 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i256 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i256 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i256 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i256 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i256 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i128> [[VEC_IND]], splat (i128 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i256 [[INDEX_NEXT]], 18446744073709551624
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i256 [ 18446744073709551624, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i256 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i256 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i256 [[IV_NEXT]], 18446744073709551617
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i256 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i256 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i256 %iv, 1
+  %cond = icmp eq i256 %iv.next, 18446744073709551617
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
 ;.
 ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
 ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -895,4 +1001,6 @@ end:
 ; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
 ; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
 ; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
+; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
+; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
index 892ddccbc93b4..a73958cb30543 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
@@ -14,8 +14,8 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
@@ -50,7 +50,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
 ; CHECK:       [[PRED_STORE_CONTINUE6]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
 ; CHECK-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:



More information about the llvm-commits mailing list