[llvm] [LV] Optimize VPWidenIntOrFpInductionRecipe for known TC (PR #118828)

Hari Limaye via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 19 03:49:48 PST 2024


https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/118828

>From 95613947be64f8f83ddd71f0d419bb8f722af065 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 5 Dec 2024 16:14:46 +0000
Subject: [PATCH 1/6] [LV] Pre-commit tests for optimizing induction variable
 width

---
 ...folding-optimize-vector-induction-width.ll | 898 ++++++++++++++++++
 1 file changed, 898 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll

diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
new file mode 100644
index 00000000000000..252d6b063eee9b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
@@ -0,0 +1,898 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+define dso_local void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_small_tc_i8(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 14)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 15
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 15
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_upper_limit_i8(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 254)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 256, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 255
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 255
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_lower_limit_i16(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 256)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 264
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 264, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 257
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 257
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_upper_limit_i16(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 65534)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 65536, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 65535
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 65535
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_lower_limit_i32(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 65536)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65544
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 65544, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 65537
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 65537
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_upper_limit_i32(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 4294967294)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967296
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967296, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 4294967295
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 4294967295
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_lower_limit_i64(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 4294967296)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967304
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967304, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 4294967297
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 4294967297
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+define dso_local void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define dso_local void @canonical_upper_limit_i64(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 -2)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP6]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP9]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP12]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP14]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP15]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP17]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP18]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP20]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP21]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP23]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP24]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], -1
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 18446744073709551615
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
+; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
+; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
+; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
+;.

>From ab915f742d21920974ea8845edaef2ba055c329c Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 5 Dec 2024 16:22:15 +0000
Subject: [PATCH 2/6] [LV] Optimize VPWidenIntOrFpInductionRecipe for known TC

Optimize the IR generated for a VPWidenIntOrFpInductionRecipe to use the
narrowest type necessary, when the trip-count of a loop is known to be
constant and the only use of the recipe is the condition used by the
vector loop's backedge branch.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  3 +
 llvm/lib/Transforms/Vectorize/VPlan.h         |  3 +
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 54 +++++++++++
 .../Transforms/Vectorize/VPlanTransforms.h    |  4 +
 .../AArch64/conditional-branches-cost.ll      | 18 ++--
 .../pr45679-fold-tail-by-masking.ll           | 12 +--
 .../LoopVectorize/reduction-inloop-pred.ll    | 90 +++++++++----------
 .../LoopVectorize/reduction-predselect.ll     | 54 +++++------
 .../tail-folding-alloca-in-loop.ll            |  6 +-
 ...folding-optimize-vector-induction-width.ll | 36 ++++----
 .../LoopVectorize/tail-folding-switch.ll      |  6 +-
 11 files changed, 175 insertions(+), 111 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c7c044a042719..3e6774ff14ac1f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7657,6 +7657,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
                               OrigLoop->getHeader()->getContext());
   VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
 
+  auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
+  VPlanTransforms::optimizeForTCAndVF(BestVPlan, TC, BestVF);
+
   LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF
                     << ", UF=" << BestUF << '\n');
   BestVPlan.setName("Final VPlan");
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e1d828f038f9a2..809efc5988930b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2150,6 +2150,9 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
   VPValue *getStepValue() { return getOperand(1); }
   const VPValue *getStepValue() const { return getOperand(1); }
 
+  /// Update the start value of the recipe.
+  void setStepValue(VPValue *V) { setOperand(1, V); }
+
   VPValue *getVFValue() { return getOperand(2); }
   const VPValue *getVFValue() const { return getOperand(2); }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cee83d1015b536..6e27b1b5055446 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -661,6 +661,60 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
   }
 }
 
+void VPlanTransforms::optimizeForTCAndVF(VPlan &Plan, unsigned TC,
+                                         ElementCount BestVF) {
+  assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
+  if (!TC || !BestVF.isFixed())
+    return;
+
+  // Calculate the widest type required for known TC and VF.
+  uint64_t Width = BestVF.getKnownMinValue();
+  uint64_t MaxVal = alignTo(TC, Width) - 1;
+  unsigned MaxActiveBits = Log2_64_Ceil(MaxVal);
+  unsigned NewBitWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8);
+  LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext();
+  auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth);
+
+  bool MadeChange = false;
+
+  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
+    auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
+    if (!WideIV || !WideIV->isCanonical())
+      continue;
+
+    if (WideIV->hasMoreThanOneUniqueUser())
+      continue;
+
+    // Currently only handle cases where the single user is a header-mask
+    // comparison with the backedge-taken-count.
+    VPValue *Bound;
+    using namespace VPlanPatternMatch;
+    auto *Cmp = dyn_cast<VPInstruction>(*WideIV->user_begin());
+    if (!Cmp ||
+        !match(Cmp, m_Binary<Instruction::ICmp>(m_Specific(WideIV),
+                                                m_VPValue(Bound))) ||
+        Bound != Plan.getOrCreateBackedgeTakenCount())
+      continue;
+
+    if (NewIVTy == WideIV->getScalarType())
+      continue;
+
+    // Update IV operands and comparison bound to use new narrower type.
+    auto *NewStart = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
+    WideIV->setStartValue(NewStart);
+    auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
+    WideIV->setStepValue(NewStep);
+    auto *NewBound = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, TC - 1));
+    Cmp->setOperand(1, NewBound);
+
+    MadeChange = true;
+  }
+
+  if (MadeChange)
+    Plan.setVF(BestVF);
+}
+
 void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
                                          unsigned BestUF,
                                          PredicatedScalarEvolution &PSE) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 1491e0a8df04d5..ba772cf385d560 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -15,6 +15,7 @@
 
 #include "VPlan.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/Support/TypeSize.h"
 
 namespace llvm {
 
@@ -58,6 +59,9 @@ struct VPlanTransforms {
                                  unsigned BestUF,
                                  PredicatedScalarEvolution &PSE);
 
+  /// Optimize \p Plan based on \p TC and \p BestVF.
+  static void optimizeForTCAndVF(VPlan &Plan, unsigned TC, ElementCount BestVF);
+
   /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
   /// optimizations, dead recipe removal, replicate region optimizations and
   /// block merging.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 69560305706364..bec30b3dcb5289 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -389,8 +389,8 @@ define void @latch_branch_cost(ptr %dst) {
 ; PRED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; PRED:       vector.body:
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; PRED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 99)
+; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; PRED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 99)
 ; PRED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; PRED-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; PRED:       pred.store.if:
@@ -456,7 +456,7 @@ define void @latch_branch_cost(ptr %dst) {
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; PRED:       pred.store.continue14:
 ; PRED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; PRED-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104
 ; PRED-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; PRED:       middle.block:
@@ -903,9 +903,9 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DEFAULT:       vector.body:
 ; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
-; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
+; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6)
+; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6)
 ; DEFAULT-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
 ; DEFAULT-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DEFAULT:       pred.store.if:
@@ -978,7 +978,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; DEFAULT-NEXT:    store i8 [[TMP33]], ptr [[TMP32]], align 1
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; DEFAULT:       pred.store.continue14:
-; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; DEFAULT-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; DEFAULT:       middle.block:
@@ -1005,9 +1005,9 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; PRED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; PRED:       vector.body:
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
-; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
+; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
 ; PRED-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; PRED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6)
+; PRED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6)
 ; PRED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
 ; PRED-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; PRED:       pred.store.if:
@@ -1080,7 +1080,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; PRED-NEXT:    store i8 [[TMP33]], ptr [[TMP32]], align 1
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; PRED:       pred.store.continue14:
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; PRED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; PRED-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
 ; PRED:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index c301ef3c5319a2..b207cca03c90f2 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -18,8 +18,8 @@ define void @pr45679(ptr %A) optsize {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 13)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 13)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -53,7 +53,7 @@ define void @pr45679(ptr %A) optsize {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -213,8 +213,8 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 13)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 13)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -252,7 +252,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index 8e132ed8399cd6..c76057a18bf3cd 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -11,9 +11,9 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -57,7 +57,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -97,10 +97,10 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -166,7 +166,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]])
 ; CHECK-NEXT:    [[TMP48]] = add i32 [[TMP47]], [[TMP45]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -212,9 +212,9 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -261,7 +261,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]])
 ; CHECK-NEXT:    [[TMP29]] = add i32 [[TMP28]], [[TMP26]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -302,10 +302,10 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -371,7 +371,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]])
 ; CHECK-NEXT:    [[TMP48]] = mul i32 [[TMP47]], [[TMP45]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -417,10 +417,10 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -484,7 +484,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]])
 ; CHECK-NEXT:    [[TMP46]] = add i32 [[TMP45]], [[TMP43]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -530,9 +530,9 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -595,7 +595,7 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = mul i32 [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -638,9 +638,9 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -703,7 +703,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = and i32 [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -746,9 +746,9 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -809,7 +809,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]])
 ; CHECK-NEXT:    [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -852,9 +852,9 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -915,7 +915,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]])
 ; CHECK-NEXT:    [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       middle.block:
@@ -958,9 +958,9 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1021,7 +1021,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1064,9 +1064,9 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1129,7 +1129,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1172,9 +1172,9 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1218,7 +1218,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1259,9 +1259,9 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1305,7 +1305,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1438,9 +1438,9 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
@@ -1488,7 +1488,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1534,9 +1534,9 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1583,7 +1583,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
index f95be1a221e73b..3d40707a5e97ec 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
@@ -11,9 +11,9 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -56,7 +56,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -203,9 +203,9 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -265,7 +265,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -308,9 +308,9 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -370,7 +370,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> splat (i32 -1)
 ; CHECK-NEXT:    [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -413,9 +413,9 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -475,7 +475,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42]] = or <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -518,9 +518,9 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -580,7 +580,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42]] = xor <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -623,9 +623,9 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -685,7 +685,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = fadd fast <4 x float> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -728,9 +728,9 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -790,7 +790,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = fmul fast <4 x float> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -833,9 +833,9 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -878,7 +878,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
 ; CHECK-NEXT:    [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       middle.block:
@@ -919,9 +919,9 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -964,7 +964,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
 ; CHECK-NEXT:    [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
index 56fc8eac35bad6..3a54244a41017d 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -10,8 +10,8 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 200)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -49,7 +49,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
 ; CHECK:       [[PRED_STORE_CONTINUE6]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204
 ; CHECK-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
index 252d6b063eee9b..ae20e7d823a583 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
@@ -13,8 +13,8 @@ define dso_local void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p)
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 14)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 14)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -80,7 +80,7 @@ define dso_local void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p)
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -122,8 +122,8 @@ define dso_local void @canonical_upper_limit_i8(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 254)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 -2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -189,7 +189,7 @@ define dso_local void @canonical_upper_limit_i8(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -231,8 +231,8 @@ define dso_local void @canonical_lower_limit_i16(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 256)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i16> [[VEC_IND]], splat (i16 256)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -298,7 +298,7 @@ define dso_local void @canonical_lower_limit_i16(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 264
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -340,8 +340,8 @@ define dso_local void @canonical_upper_limit_i16(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 65534)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i16> [[VEC_IND]], splat (i16 -2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -407,7 +407,7 @@ define dso_local void @canonical_upper_limit_i16(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -449,8 +449,8 @@ define dso_local void @canonical_lower_limit_i32(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 65536)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], splat (i32 65536)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -516,7 +516,7 @@ define dso_local void @canonical_lower_limit_i32(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65544
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -558,8 +558,8 @@ define dso_local void @canonical_upper_limit_i32(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 4294967294)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], splat (i32 -2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -625,7 +625,7 @@ define dso_local void @canonical_upper_limit_i32(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
 ; CHECK:       [[PRED_STORE_CONTINUE14]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967296
 ; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
index 31732f027f6dd4..59d382c5cce985 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
@@ -13,8 +13,8 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
@@ -50,7 +50,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
 ; CHECK:       [[PRED_STORE_CONTINUE6]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
 ; CHECK-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:

>From 14574f70927c7ddc7f82c0a7fb37a8c2671f6841 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Fri, 6 Dec 2024 12:52:31 +0000
Subject: [PATCH 3/6] Fix regression tests for other Targets

---
 .../SystemZ/predicated-first-order-recurrence.ll            | 6 +++---
 .../LoopVectorize/X86/consecutive-ptr-uniforms.ll           | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
index d0754f1c2bb555..4fbcab9792c1b8 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
@@ -19,10 +19,10 @@ define void @func_21() {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
 ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -59,7 +59,7 @@ define void @func_21() {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index e685a83d9ccbb2..e26221eeefd2d8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -86,8 +86,8 @@ attributes #0 = { "target-cpu"="knl" }
 ; FORCE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FORCE:       vector.body:
 ; FORCE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
-; FORCE-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
-; FORCE-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 2)
+; FORCE-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
+; FORCE-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 2)
 ; FORCE-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; FORCE-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FORCE:       pred.store.if:
@@ -103,7 +103,7 @@ attributes #0 = { "target-cpu"="knl" }
 ; FORCE-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; FORCE:       pred.store.continue2:
 ; FORCE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; FORCE-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; FORCE-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
 ; FORCE-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
 ; FORCE-NEXT:    br i1 [[TMP15]], label {{%.*}}, label [[VECTOR_BODY]]
 ;

>From eaa7c25c62ab7c6ab373ae12e3b3bdfbcba68745 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 17 Dec 2024 14:11:46 +0000
Subject: [PATCH 4/6] Address review comments

- Fix incorrect comment
- Consider UF as well as VF
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp   |  2 +-
 llvm/lib/Transforms/Vectorize/VPlan.h             |  2 +-
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 14 +++++++++-----
 llvm/lib/Transforms/Vectorize/VPlanTransforms.h   |  5 +++--
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3e6774ff14ac1f..e8b092e0ddc83b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7658,7 +7658,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
 
   auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
-  VPlanTransforms::optimizeForTCAndVF(BestVPlan, TC, BestVF);
+  VPlanTransforms::optimizeForTCAndVFAndUF(BestVPlan, TC, BestVF, BestUF);
 
   LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF
                     << ", UF=" << BestUF << '\n');
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 809efc5988930b..49300831e1c38a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2150,7 +2150,7 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
   VPValue *getStepValue() { return getOperand(1); }
   const VPValue *getStepValue() const { return getOperand(1); }
 
-  /// Update the start value of the recipe.
+  /// Update the step value of the recipe.
   void setStepValue(VPValue *V) { setOperand(1, V); }
 
   VPValue *getVFValue() { return getOperand(2); }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6e27b1b5055446..9cd7e62f2f5e86 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -661,14 +661,16 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
   }
 }
 
-void VPlanTransforms::optimizeForTCAndVF(VPlan &Plan, unsigned TC,
-                                         ElementCount BestVF) {
+void VPlanTransforms::optimizeForTCAndVFAndUF(VPlan &Plan, unsigned TC,
+                                              ElementCount BestVF,
+                                              unsigned BestUF) {
   assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
+  assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
   if (!TC || !BestVF.isFixed())
     return;
 
-  // Calculate the widest type required for known TC and VF.
-  uint64_t Width = BestVF.getKnownMinValue();
+  // Calculate the widest type required for known TC, VF and UF.
+  uint64_t Width = BestVF.getKnownMinValue() * BestUF;
   uint64_t MaxVal = alignTo(TC, Width) - 1;
   unsigned MaxActiveBits = Log2_64_Ceil(MaxVal);
   unsigned NewBitWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8);
@@ -711,8 +713,10 @@ void VPlanTransforms::optimizeForTCAndVF(VPlan &Plan, unsigned TC,
     MadeChange = true;
   }
 
-  if (MadeChange)
+  if (MadeChange) {
     Plan.setVF(BestVF);
+    Plan.setUF(BestUF);
+  }
 }
 
 void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index ba772cf385d560..948665b89adc00 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -59,8 +59,9 @@ struct VPlanTransforms {
                                  unsigned BestUF,
                                  PredicatedScalarEvolution &PSE);
 
-  /// Optimize \p Plan based on \p TC and \p BestVF.
-  static void optimizeForTCAndVF(VPlan &Plan, unsigned TC, ElementCount BestVF);
+  /// Optimize \p Plan based on \p TC, \p BestVF and \p BestUF.
+  static void optimizeForTCAndVFAndUF(VPlan &Plan, unsigned TC,
+                                      ElementCount BestVF, unsigned BestUF);
 
   /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
   /// optimizations, dead recipe removal, replicate region optimizations and

>From ef4d915fecc68dd0cdf64b38b2747f7bb5b5b22b Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 18 Dec 2024 12:00:19 +0000
Subject: [PATCH 5/6] Address review comments

- Move inside optimizeForVFAndUF
- Merge continue checks
- Remove redundant dyn_cast + check
- Remove dso_local and target triple from regression test
- Add test for interleaving
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |    3 -
 .../Transforms/Vectorize/VPlanTransforms.cpp  |   76 +-
 .../Transforms/Vectorize/VPlanTransforms.h    |    5 -
 ...timize-vector-induction-width-unrolling.ll | 4162 +++++++++++++++++
 ...folding-optimize-vector-induction-width.ll |   37 +-
 5 files changed, 4223 insertions(+), 60 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width-unrolling.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e8b092e0ddc83b..3c7c044a042719 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7657,9 +7657,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
                               OrigLoop->getHeader()->getContext());
   VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
 
-  auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
-  VPlanTransforms::optimizeForTCAndVFAndUF(BestVPlan, TC, BestVF, BestUF);
-
   LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF
                     << ", UF=" << BestUF << '\n');
   BestVPlan.setName("Final VPlan");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9cd7e62f2f5e86..ab1c1c10ab1f73 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -27,6 +27,8 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/TypeSize.h"
 
 using namespace llvm;
 
@@ -661,17 +663,20 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
   }
 }
 
-void VPlanTransforms::optimizeForTCAndVFAndUF(VPlan &Plan, unsigned TC,
-                                              ElementCount BestVF,
-                                              unsigned BestUF) {
-  assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
-  assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+/// Optimize the width of vector induction variables based on \p TC, \p BestVF
+/// and \p BestUF.
+static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
+                                                     ElementCount BestVF,
+                                                     unsigned BestUF) {
+  auto *TC = dyn_cast_if_present<ConstantInt>(
+      Plan.getTripCount()->getUnderlyingValue());
   if (!TC || !BestVF.isFixed())
-    return;
+    return false;
 
   // Calculate the widest type required for known TC, VF and UF.
+  uint64_t TCVal = TC->getZExtValue();
   uint64_t Width = BestVF.getKnownMinValue() * BestUF;
-  uint64_t MaxVal = alignTo(TC, Width) - 1;
+  uint64_t MaxVal = alignTo(TCVal, Width) - 1;
   unsigned MaxActiveBits = Log2_64_Ceil(MaxVal);
   unsigned NewBitWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8);
   LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext();
@@ -682,48 +687,39 @@ void VPlanTransforms::optimizeForTCAndVFAndUF(VPlan &Plan, unsigned TC,
   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
     auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
-    if (!WideIV || !WideIV->isCanonical())
-      continue;
-
-    if (WideIV->hasMoreThanOneUniqueUser())
+    if (!WideIV || !WideIV->isCanonical() ||
+        WideIV->hasMoreThanOneUniqueUser() ||
+        NewIVTy == WideIV->getScalarType())
       continue;
 
     // Currently only handle cases where the single user is a header-mask
     // comparison with the backedge-taken-count.
     VPValue *Bound;
     using namespace VPlanPatternMatch;
-    auto *Cmp = dyn_cast<VPInstruction>(*WideIV->user_begin());
-    if (!Cmp ||
-        !match(Cmp, m_Binary<Instruction::ICmp>(m_Specific(WideIV),
-                                                m_VPValue(Bound))) ||
+    if (!match(*WideIV->user_begin(),
+               m_Binary<Instruction::ICmp>(m_Specific(WideIV),
+                                           m_VPValue(Bound))) ||
         Bound != Plan.getOrCreateBackedgeTakenCount())
       continue;
 
-    if (NewIVTy == WideIV->getScalarType())
-      continue;
-
     // Update IV operands and comparison bound to use new narrower type.
     auto *NewStart = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
     WideIV->setStartValue(NewStart);
     auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
     WideIV->setStepValue(NewStep);
-    auto *NewBound = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, TC - 1));
+    auto *NewBound = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, TCVal - 1));
+    auto *Cmp = dyn_cast<VPInstruction>(*WideIV->user_begin());
     Cmp->setOperand(1, NewBound);
 
     MadeChange = true;
   }
 
-  if (MadeChange) {
-    Plan.setVF(BestVF);
-    Plan.setUF(BestUF);
-  }
+  return MadeChange;
 }
 
-void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
-                                         unsigned BestUF,
-                                         PredicatedScalarEvolution &PSE) {
-  assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
-  assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
+                                              unsigned BestUF,
+                                              PredicatedScalarEvolution &PSE) {
   VPBasicBlock *ExitingVPBB =
       Plan.getVectorLoopRegion()->getExitingBasicBlock();
   auto *Term = &ExitingVPBB->back();
@@ -736,7 +732,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
   if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
       !match(Term,
              m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))
-    return;
+    return false;
 
   ScalarEvolution &SE = *PSE.getSE();
   const SCEV *TripCount =
@@ -747,7 +743,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
   const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
   if (TripCount->isZero() ||
       !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
-    return;
+    return false;
 
   LLVMContext &Ctx = SE.getContext();
   auto *BOC = new VPInstruction(
@@ -759,8 +755,24 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
   for (VPValue *Op : PossiblyDead)
     recursivelyDeleteDeadRecipes(Op);
   ExitingVPBB->appendRecipe(BOC);
-  Plan.setVF(BestVF);
-  Plan.setUF(BestUF);
+
+  return true;
+}
+
+void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
+                                         unsigned BestUF,
+                                         PredicatedScalarEvolution &PSE) {
+  assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
+  assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+
+  bool MadeChange =
+      simplifyBranchConditionForVFAndUF(Plan, BestVF, BestUF, PSE);
+  MadeChange |= optimizeVectorInductionWidthForTCAndVFUF(Plan, BestVF, BestUF);
+
+  if (MadeChange) {
+    Plan.setVF(BestVF);
+    Plan.setUF(BestUF);
+  }
   // TODO: Further simplifications are possible
   //      1. Replace inductions with constants.
   //      2. Replace vector loop region with VPBasicBlock.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 948665b89adc00..1491e0a8df04d5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -15,7 +15,6 @@
 
 #include "VPlan.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
-#include "llvm/Support/TypeSize.h"
 
 namespace llvm {
 
@@ -59,10 +58,6 @@ struct VPlanTransforms {
                                  unsigned BestUF,
                                  PredicatedScalarEvolution &PSE);
 
-  /// Optimize \p Plan based on \p TC, \p BestVF and \p BestUF.
-  static void optimizeForTCAndVFAndUF(VPlan &Plan, unsigned TC,
-                                      ElementCount BestVF, unsigned BestUF);
-
   /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
   /// optimizations, dead recipe removal, replicate region optimizations and
   /// block merging.
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width-unrolling.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width-unrolling.ll
new file mode 100644
index 00000000000000..20b7f91ec41aaa
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width-unrolling.ll
@@ -0,0 +1,4162 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -force-vector-width=64 -force-vector-interleave=8 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
+
+define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_small_tc_i8(
+; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE1022:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <64 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31, i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43, i64 44, i64 45, i64 46, i64 47, i64 48, i64 49, i64 50, i64 51, i64 52, i64 53, i64 54, i64 55, i64 56, i64 57, i64 58, i64 59, i64 60, i64 61, i64 62, i64 63>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE1022]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <64 x i64> [[VEC_IND]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <64 x i64> [[STEP_ADD]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <64 x i64> [[STEP_ADD_2]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_4:%.*]] = add <64 x i64> [[STEP_ADD_3]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_5:%.*]] = add <64 x i64> [[STEP_ADD_4]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_6:%.*]] = add <64 x i64> [[STEP_ADD_5]], splat (i64 64)
+; CHECK-NEXT:    [[STEP_ADD_7:%.*]] = add <64 x i64> [[STEP_ADD_6]], splat (i64 64)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <64 x i64> [[VEC_IND]], splat (i64 14)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <64 x i64> [[STEP_ADD]], splat (i64 14)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <64 x i64> [[STEP_ADD_2]], splat (i64 14)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <64 x i64> [[STEP_ADD_3]], splat (i64 14)
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <64 x i64> [[STEP_ADD_4]], splat (i64 14)
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule <64 x i64> [[STEP_ADD_5]], splat (i64 14)
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule <64 x i64> [[STEP_ADD_6]], splat (i64 14)
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ule <64 x i64> [[STEP_ADD_7]], splat (i64 14)
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <64 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP9]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP10]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <64 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP12]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP13]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <64 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP15]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP16]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <64 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP18:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP18]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP19]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <64 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK:       [[PRED_STORE_IF7]]:
+; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP21]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP22]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; CHECK:       [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <64 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK:       [[PRED_STORE_IF9]]:
+; CHECK-NEXT:    [[TMP24:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP24]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP25]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; CHECK:       [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <64 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK:       [[PRED_STORE_IF11]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP27]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP28]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; CHECK:       [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <64 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    br i1 [[TMP29]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; CHECK:       [[PRED_STORE_IF13]]:
+; CHECK-NEXT:    [[TMP30:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP30]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP31]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; CHECK:       [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <64 x i1> [[TMP0]], i32 8
+; CHECK-NEXT:    br i1 [[TMP32]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; CHECK:       [[PRED_STORE_IF15]]:
+; CHECK-NEXT:    [[TMP33:%.*]] = add i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP33]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP34]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
+; CHECK:       [[PRED_STORE_CONTINUE16]]:
+; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <64 x i1> [[TMP0]], i32 9
+; CHECK-NEXT:    br i1 [[TMP35]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; CHECK:       [[PRED_STORE_IF17]]:
+; CHECK-NEXT:    [[TMP36:%.*]] = add i64 [[INDEX]], 9
+; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP36]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP37]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
+; CHECK:       [[PRED_STORE_CONTINUE18]]:
+; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <64 x i1> [[TMP0]], i32 10
+; CHECK-NEXT:    br i1 [[TMP38]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK:       [[PRED_STORE_IF19]]:
+; CHECK-NEXT:    [[TMP39:%.*]] = add i64 [[INDEX]], 10
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP39]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP40]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
+; CHECK:       [[PRED_STORE_CONTINUE20]]:
+; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <64 x i1> [[TMP0]], i32 11
+; CHECK-NEXT:    br i1 [[TMP41]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; CHECK:       [[PRED_STORE_IF21]]:
+; CHECK-NEXT:    [[TMP42:%.*]] = add i64 [[INDEX]], 11
+; CHECK-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP42]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP43]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
+; CHECK:       [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <64 x i1> [[TMP0]], i32 12
+; CHECK-NEXT:    br i1 [[TMP44]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; CHECK:       [[PRED_STORE_IF23]]:
+; CHECK-NEXT:    [[TMP45:%.*]] = add i64 [[INDEX]], 12
+; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP45]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP46]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
+; CHECK:       [[PRED_STORE_CONTINUE24]]:
+; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <64 x i1> [[TMP0]], i32 13
+; CHECK-NEXT:    br i1 [[TMP47]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; CHECK:       [[PRED_STORE_IF25]]:
+; CHECK-NEXT:    [[TMP48:%.*]] = add i64 [[INDEX]], 13
+; CHECK-NEXT:    [[TMP49:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP48]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP49]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
+; CHECK:       [[PRED_STORE_CONTINUE26]]:
+; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <64 x i1> [[TMP0]], i32 14
+; CHECK-NEXT:    br i1 [[TMP50]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; CHECK:       [[PRED_STORE_IF27]]:
+; CHECK-NEXT:    [[TMP51:%.*]] = add i64 [[INDEX]], 14
+; CHECK-NEXT:    [[TMP52:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP51]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP52]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
+; CHECK:       [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT:    [[TMP53:%.*]] = extractelement <64 x i1> [[TMP0]], i32 15
+; CHECK-NEXT:    br i1 [[TMP53]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; CHECK:       [[PRED_STORE_IF29]]:
+; CHECK-NEXT:    [[TMP54:%.*]] = add i64 [[INDEX]], 15
+; CHECK-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP54]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP55]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
+; CHECK:       [[PRED_STORE_CONTINUE30]]:
+; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <64 x i1> [[TMP0]], i32 16
+; CHECK-NEXT:    br i1 [[TMP56]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]]
+; CHECK:       [[PRED_STORE_IF31]]:
+; CHECK-NEXT:    [[TMP57:%.*]] = add i64 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP57]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP58]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE32]]
+; CHECK:       [[PRED_STORE_CONTINUE32]]:
+; CHECK-NEXT:    [[TMP59:%.*]] = extractelement <64 x i1> [[TMP0]], i32 17
+; CHECK-NEXT:    br i1 [[TMP59]], label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]]
+; CHECK:       [[PRED_STORE_IF33]]:
+; CHECK-NEXT:    [[TMP60:%.*]] = add i64 [[INDEX]], 17
+; CHECK-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP60]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP61]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE34]]
+; CHECK:       [[PRED_STORE_CONTINUE34]]:
+; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <64 x i1> [[TMP0]], i32 18
+; CHECK-NEXT:    br i1 [[TMP62]], label %[[PRED_STORE_IF35:.*]], label %[[PRED_STORE_CONTINUE36:.*]]
+; CHECK:       [[PRED_STORE_IF35]]:
+; CHECK-NEXT:    [[TMP63:%.*]] = add i64 [[INDEX]], 18
+; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP63]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP64]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE36]]
+; CHECK:       [[PRED_STORE_CONTINUE36]]:
+; CHECK-NEXT:    [[TMP65:%.*]] = extractelement <64 x i1> [[TMP0]], i32 19
+; CHECK-NEXT:    br i1 [[TMP65]], label %[[PRED_STORE_IF37:.*]], label %[[PRED_STORE_CONTINUE38:.*]]
+; CHECK:       [[PRED_STORE_IF37]]:
+; CHECK-NEXT:    [[TMP66:%.*]] = add i64 [[INDEX]], 19
+; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP66]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP67]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE38]]
+; CHECK:       [[PRED_STORE_CONTINUE38]]:
+; CHECK-NEXT:    [[TMP68:%.*]] = extractelement <64 x i1> [[TMP0]], i32 20
+; CHECK-NEXT:    br i1 [[TMP68]], label %[[PRED_STORE_IF39:.*]], label %[[PRED_STORE_CONTINUE40:.*]]
+; CHECK:       [[PRED_STORE_IF39]]:
+; CHECK-NEXT:    [[TMP69:%.*]] = add i64 [[INDEX]], 20
+; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP69]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP70]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE40]]
+; CHECK:       [[PRED_STORE_CONTINUE40]]:
+; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <64 x i1> [[TMP0]], i32 21
+; CHECK-NEXT:    br i1 [[TMP71]], label %[[PRED_STORE_IF41:.*]], label %[[PRED_STORE_CONTINUE42:.*]]
+; CHECK:       [[PRED_STORE_IF41]]:
+; CHECK-NEXT:    [[TMP72:%.*]] = add i64 [[INDEX]], 21
+; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP72]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP73]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE42]]
+; CHECK:       [[PRED_STORE_CONTINUE42]]:
+; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <64 x i1> [[TMP0]], i32 22
+; CHECK-NEXT:    br i1 [[TMP74]], label %[[PRED_STORE_IF43:.*]], label %[[PRED_STORE_CONTINUE44:.*]]
+; CHECK:       [[PRED_STORE_IF43]]:
+; CHECK-NEXT:    [[TMP75:%.*]] = add i64 [[INDEX]], 22
+; CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP75]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP76]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE44]]
+; CHECK:       [[PRED_STORE_CONTINUE44]]:
+; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <64 x i1> [[TMP0]], i32 23
+; CHECK-NEXT:    br i1 [[TMP77]], label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]]
+; CHECK:       [[PRED_STORE_IF45]]:
+; CHECK-NEXT:    [[TMP78:%.*]] = add i64 [[INDEX]], 23
+; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP78]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP79]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE46]]
+; CHECK:       [[PRED_STORE_CONTINUE46]]:
+; CHECK-NEXT:    [[TMP80:%.*]] = extractelement <64 x i1> [[TMP0]], i32 24
+; CHECK-NEXT:    br i1 [[TMP80]], label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]]
+; CHECK:       [[PRED_STORE_IF47]]:
+; CHECK-NEXT:    [[TMP81:%.*]] = add i64 [[INDEX]], 24
+; CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP81]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP82]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE48]]
+; CHECK:       [[PRED_STORE_CONTINUE48]]:
+; CHECK-NEXT:    [[TMP83:%.*]] = extractelement <64 x i1> [[TMP0]], i32 25
+; CHECK-NEXT:    br i1 [[TMP83]], label %[[PRED_STORE_IF49:.*]], label %[[PRED_STORE_CONTINUE50:.*]]
+; CHECK:       [[PRED_STORE_IF49]]:
+; CHECK-NEXT:    [[TMP84:%.*]] = add i64 [[INDEX]], 25
+; CHECK-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP84]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP85]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE50]]
+; CHECK:       [[PRED_STORE_CONTINUE50]]:
+; CHECK-NEXT:    [[TMP86:%.*]] = extractelement <64 x i1> [[TMP0]], i32 26
+; CHECK-NEXT:    br i1 [[TMP86]], label %[[PRED_STORE_IF51:.*]], label %[[PRED_STORE_CONTINUE52:.*]]
+; CHECK:       [[PRED_STORE_IF51]]:
+; CHECK-NEXT:    [[TMP87:%.*]] = add i64 [[INDEX]], 26
+; CHECK-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP87]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP88]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE52]]
+; CHECK:       [[PRED_STORE_CONTINUE52]]:
+; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <64 x i1> [[TMP0]], i32 27
+; CHECK-NEXT:    br i1 [[TMP89]], label %[[PRED_STORE_IF53:.*]], label %[[PRED_STORE_CONTINUE54:.*]]
+; CHECK:       [[PRED_STORE_IF53]]:
+; CHECK-NEXT:    [[TMP90:%.*]] = add i64 [[INDEX]], 27
+; CHECK-NEXT:    [[TMP91:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP90]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP91]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE54]]
+; CHECK:       [[PRED_STORE_CONTINUE54]]:
+; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <64 x i1> [[TMP0]], i32 28
+; CHECK-NEXT:    br i1 [[TMP92]], label %[[PRED_STORE_IF55:.*]], label %[[PRED_STORE_CONTINUE56:.*]]
+; CHECK:       [[PRED_STORE_IF55]]:
+; CHECK-NEXT:    [[TMP93:%.*]] = add i64 [[INDEX]], 28
+; CHECK-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP93]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP94]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE56]]
+; CHECK:       [[PRED_STORE_CONTINUE56]]:
+; CHECK-NEXT:    [[TMP95:%.*]] = extractelement <64 x i1> [[TMP0]], i32 29
+; CHECK-NEXT:    br i1 [[TMP95]], label %[[PRED_STORE_IF57:.*]], label %[[PRED_STORE_CONTINUE58:.*]]
+; CHECK:       [[PRED_STORE_IF57]]:
+; CHECK-NEXT:    [[TMP96:%.*]] = add i64 [[INDEX]], 29
+; CHECK-NEXT:    [[TMP97:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP96]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP97]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE58]]
+; CHECK:       [[PRED_STORE_CONTINUE58]]:
+; CHECK-NEXT:    [[TMP98:%.*]] = extractelement <64 x i1> [[TMP0]], i32 30
+; CHECK-NEXT:    br i1 [[TMP98]], label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]]
+; CHECK:       [[PRED_STORE_IF59]]:
+; CHECK-NEXT:    [[TMP99:%.*]] = add i64 [[INDEX]], 30
+; CHECK-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP99]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP100]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE60]]
+; CHECK:       [[PRED_STORE_CONTINUE60]]:
+; CHECK-NEXT:    [[TMP101:%.*]] = extractelement <64 x i1> [[TMP0]], i32 31
+; CHECK-NEXT:    br i1 [[TMP101]], label %[[PRED_STORE_IF61:.*]], label %[[PRED_STORE_CONTINUE62:.*]]
+; CHECK:       [[PRED_STORE_IF61]]:
+; CHECK-NEXT:    [[TMP102:%.*]] = add i64 [[INDEX]], 31
+; CHECK-NEXT:    [[TMP103:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP102]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP103]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE62]]
+; CHECK:       [[PRED_STORE_CONTINUE62]]:
+; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <64 x i1> [[TMP0]], i32 32
+; CHECK-NEXT:    br i1 [[TMP104]], label %[[PRED_STORE_IF63:.*]], label %[[PRED_STORE_CONTINUE64:.*]]
+; CHECK:       [[PRED_STORE_IF63]]:
+; CHECK-NEXT:    [[TMP105:%.*]] = add i64 [[INDEX]], 32
+; CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP105]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP106]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE64]]
+; CHECK:       [[PRED_STORE_CONTINUE64]]:
+; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <64 x i1> [[TMP0]], i32 33
+; CHECK-NEXT:    br i1 [[TMP107]], label %[[PRED_STORE_IF65:.*]], label %[[PRED_STORE_CONTINUE66:.*]]
+; CHECK:       [[PRED_STORE_IF65]]:
+; CHECK-NEXT:    [[TMP108:%.*]] = add i64 [[INDEX]], 33
+; CHECK-NEXT:    [[TMP109:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP108]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP109]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE66]]
+; CHECK:       [[PRED_STORE_CONTINUE66]]:
+; CHECK-NEXT:    [[TMP110:%.*]] = extractelement <64 x i1> [[TMP0]], i32 34
+; CHECK-NEXT:    br i1 [[TMP110]], label %[[PRED_STORE_IF67:.*]], label %[[PRED_STORE_CONTINUE68:.*]]
+; CHECK:       [[PRED_STORE_IF67]]:
+; CHECK-NEXT:    [[TMP111:%.*]] = add i64 [[INDEX]], 34
+; CHECK-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP111]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP112]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE68]]
+; CHECK:       [[PRED_STORE_CONTINUE68]]:
+; CHECK-NEXT:    [[TMP113:%.*]] = extractelement <64 x i1> [[TMP0]], i32 35
+; CHECK-NEXT:    br i1 [[TMP113]], label %[[PRED_STORE_IF69:.*]], label %[[PRED_STORE_CONTINUE70:.*]]
+; CHECK:       [[PRED_STORE_IF69]]:
+; CHECK-NEXT:    [[TMP114:%.*]] = add i64 [[INDEX]], 35
+; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP114]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP115]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE70]]
+; CHECK:       [[PRED_STORE_CONTINUE70]]:
+; CHECK-NEXT:    [[TMP116:%.*]] = extractelement <64 x i1> [[TMP0]], i32 36
+; CHECK-NEXT:    br i1 [[TMP116]], label %[[PRED_STORE_IF71:.*]], label %[[PRED_STORE_CONTINUE72:.*]]
+; CHECK:       [[PRED_STORE_IF71]]:
+; CHECK-NEXT:    [[TMP117:%.*]] = add i64 [[INDEX]], 36
+; CHECK-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP117]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP118]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE72]]
+; CHECK:       [[PRED_STORE_CONTINUE72]]:
+; CHECK-NEXT:    [[TMP119:%.*]] = extractelement <64 x i1> [[TMP0]], i32 37
+; CHECK-NEXT:    br i1 [[TMP119]], label %[[PRED_STORE_IF73:.*]], label %[[PRED_STORE_CONTINUE74:.*]]
+; CHECK:       [[PRED_STORE_IF73]]:
+; CHECK-NEXT:    [[TMP120:%.*]] = add i64 [[INDEX]], 37
+; CHECK-NEXT:    [[TMP121:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP120]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP121]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE74]]
+; CHECK:       [[PRED_STORE_CONTINUE74]]:
+; CHECK-NEXT:    [[TMP122:%.*]] = extractelement <64 x i1> [[TMP0]], i32 38
+; CHECK-NEXT:    br i1 [[TMP122]], label %[[PRED_STORE_IF75:.*]], label %[[PRED_STORE_CONTINUE76:.*]]
+; CHECK:       [[PRED_STORE_IF75]]:
+; CHECK-NEXT:    [[TMP123:%.*]] = add i64 [[INDEX]], 38
+; CHECK-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP123]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP124]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE76]]
+; CHECK:       [[PRED_STORE_CONTINUE76]]:
+; CHECK-NEXT:    [[TMP125:%.*]] = extractelement <64 x i1> [[TMP0]], i32 39
+; CHECK-NEXT:    br i1 [[TMP125]], label %[[PRED_STORE_IF77:.*]], label %[[PRED_STORE_CONTINUE78:.*]]
+; CHECK:       [[PRED_STORE_IF77]]:
+; CHECK-NEXT:    [[TMP126:%.*]] = add i64 [[INDEX]], 39
+; CHECK-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP126]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP127]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE78]]
+; CHECK:       [[PRED_STORE_CONTINUE78]]:
+; CHECK-NEXT:    [[TMP128:%.*]] = extractelement <64 x i1> [[TMP0]], i32 40
+; CHECK-NEXT:    br i1 [[TMP128]], label %[[PRED_STORE_IF79:.*]], label %[[PRED_STORE_CONTINUE80:.*]]
+; CHECK:       [[PRED_STORE_IF79]]:
+; CHECK-NEXT:    [[TMP129:%.*]] = add i64 [[INDEX]], 40
+; CHECK-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP129]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP130]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE80]]
+; CHECK:       [[PRED_STORE_CONTINUE80]]:
+; CHECK-NEXT:    [[TMP131:%.*]] = extractelement <64 x i1> [[TMP0]], i32 41
+; CHECK-NEXT:    br i1 [[TMP131]], label %[[PRED_STORE_IF81:.*]], label %[[PRED_STORE_CONTINUE82:.*]]
+; CHECK:       [[PRED_STORE_IF81]]:
+; CHECK-NEXT:    [[TMP132:%.*]] = add i64 [[INDEX]], 41
+; CHECK-NEXT:    [[TMP133:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP132]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP133]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE82]]
+; CHECK:       [[PRED_STORE_CONTINUE82]]:
+; CHECK-NEXT:    [[TMP134:%.*]] = extractelement <64 x i1> [[TMP0]], i32 42
+; CHECK-NEXT:    br i1 [[TMP134]], label %[[PRED_STORE_IF83:.*]], label %[[PRED_STORE_CONTINUE84:.*]]
+; CHECK:       [[PRED_STORE_IF83]]:
+; CHECK-NEXT:    [[TMP135:%.*]] = add i64 [[INDEX]], 42
+; CHECK-NEXT:    [[TMP136:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP135]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP136]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE84]]
+; CHECK:       [[PRED_STORE_CONTINUE84]]:
+; CHECK-NEXT:    [[TMP137:%.*]] = extractelement <64 x i1> [[TMP0]], i32 43
+; CHECK-NEXT:    br i1 [[TMP137]], label %[[PRED_STORE_IF85:.*]], label %[[PRED_STORE_CONTINUE86:.*]]
+; CHECK:       [[PRED_STORE_IF85]]:
+; CHECK-NEXT:    [[TMP138:%.*]] = add i64 [[INDEX]], 43
+; CHECK-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP138]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP139]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE86]]
+; CHECK:       [[PRED_STORE_CONTINUE86]]:
+; CHECK-NEXT:    [[TMP140:%.*]] = extractelement <64 x i1> [[TMP0]], i32 44
+; CHECK-NEXT:    br i1 [[TMP140]], label %[[PRED_STORE_IF87:.*]], label %[[PRED_STORE_CONTINUE88:.*]]
+; CHECK:       [[PRED_STORE_IF87]]:
+; CHECK-NEXT:    [[TMP141:%.*]] = add i64 [[INDEX]], 44
+; CHECK-NEXT:    [[TMP142:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP141]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP142]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE88]]
+; CHECK:       [[PRED_STORE_CONTINUE88]]:
+; CHECK-NEXT:    [[TMP143:%.*]] = extractelement <64 x i1> [[TMP0]], i32 45
+; CHECK-NEXT:    br i1 [[TMP143]], label %[[PRED_STORE_IF89:.*]], label %[[PRED_STORE_CONTINUE90:.*]]
+; CHECK:       [[PRED_STORE_IF89]]:
+; CHECK-NEXT:    [[TMP144:%.*]] = add i64 [[INDEX]], 45
+; CHECK-NEXT:    [[TMP145:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP144]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP145]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE90]]
+; CHECK:       [[PRED_STORE_CONTINUE90]]:
+; CHECK-NEXT:    [[TMP146:%.*]] = extractelement <64 x i1> [[TMP0]], i32 46
+; CHECK-NEXT:    br i1 [[TMP146]], label %[[PRED_STORE_IF91:.*]], label %[[PRED_STORE_CONTINUE92:.*]]
+; CHECK:       [[PRED_STORE_IF91]]:
+; CHECK-NEXT:    [[TMP147:%.*]] = add i64 [[INDEX]], 46
+; CHECK-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP147]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP148]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE92]]
+; CHECK:       [[PRED_STORE_CONTINUE92]]:
+; CHECK-NEXT:    [[TMP149:%.*]] = extractelement <64 x i1> [[TMP0]], i32 47
+; CHECK-NEXT:    br i1 [[TMP149]], label %[[PRED_STORE_IF93:.*]], label %[[PRED_STORE_CONTINUE94:.*]]
+; CHECK:       [[PRED_STORE_IF93]]:
+; CHECK-NEXT:    [[TMP150:%.*]] = add i64 [[INDEX]], 47
+; CHECK-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP150]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP151]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE94]]
+; CHECK:       [[PRED_STORE_CONTINUE94]]:
+; CHECK-NEXT:    [[TMP152:%.*]] = extractelement <64 x i1> [[TMP0]], i32 48
+; CHECK-NEXT:    br i1 [[TMP152]], label %[[PRED_STORE_IF95:.*]], label %[[PRED_STORE_CONTINUE96:.*]]
+; CHECK:       [[PRED_STORE_IF95]]:
+; CHECK-NEXT:    [[TMP153:%.*]] = add i64 [[INDEX]], 48
+; CHECK-NEXT:    [[TMP154:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP153]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP154]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE96]]
+; CHECK:       [[PRED_STORE_CONTINUE96]]:
+; CHECK-NEXT:    [[TMP155:%.*]] = extractelement <64 x i1> [[TMP0]], i32 49
+; CHECK-NEXT:    br i1 [[TMP155]], label %[[PRED_STORE_IF97:.*]], label %[[PRED_STORE_CONTINUE98:.*]]
+; CHECK:       [[PRED_STORE_IF97]]:
+; CHECK-NEXT:    [[TMP156:%.*]] = add i64 [[INDEX]], 49
+; CHECK-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP156]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP157]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE98]]
+; CHECK:       [[PRED_STORE_CONTINUE98]]:
+; CHECK-NEXT:    [[TMP158:%.*]] = extractelement <64 x i1> [[TMP0]], i32 50
+; CHECK-NEXT:    br i1 [[TMP158]], label %[[PRED_STORE_IF99:.*]], label %[[PRED_STORE_CONTINUE100:.*]]
+; CHECK:       [[PRED_STORE_IF99]]:
+; CHECK-NEXT:    [[TMP159:%.*]] = add i64 [[INDEX]], 50
+; CHECK-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP159]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP160]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE100]]
+; CHECK:       [[PRED_STORE_CONTINUE100]]:
+; CHECK-NEXT:    [[TMP161:%.*]] = extractelement <64 x i1> [[TMP0]], i32 51
+; CHECK-NEXT:    br i1 [[TMP161]], label %[[PRED_STORE_IF101:.*]], label %[[PRED_STORE_CONTINUE102:.*]]
+; CHECK:       [[PRED_STORE_IF101]]:
+; CHECK-NEXT:    [[TMP162:%.*]] = add i64 [[INDEX]], 51
+; CHECK-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP162]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP163]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE102]]
+; CHECK:       [[PRED_STORE_CONTINUE102]]:
+; CHECK-NEXT:    [[TMP164:%.*]] = extractelement <64 x i1> [[TMP0]], i32 52
+; CHECK-NEXT:    br i1 [[TMP164]], label %[[PRED_STORE_IF103:.*]], label %[[PRED_STORE_CONTINUE104:.*]]
+; CHECK:       [[PRED_STORE_IF103]]:
+; CHECK-NEXT:    [[TMP165:%.*]] = add i64 [[INDEX]], 52
+; CHECK-NEXT:    [[TMP166:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP165]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP166]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE104]]
+; CHECK:       [[PRED_STORE_CONTINUE104]]:
+; CHECK-NEXT:    [[TMP167:%.*]] = extractelement <64 x i1> [[TMP0]], i32 53
+; CHECK-NEXT:    br i1 [[TMP167]], label %[[PRED_STORE_IF105:.*]], label %[[PRED_STORE_CONTINUE106:.*]]
+; CHECK:       [[PRED_STORE_IF105]]:
+; CHECK-NEXT:    [[TMP168:%.*]] = add i64 [[INDEX]], 53
+; CHECK-NEXT:    [[TMP169:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP168]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP169]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE106]]
+; CHECK:       [[PRED_STORE_CONTINUE106]]:
+; CHECK-NEXT:    [[TMP170:%.*]] = extractelement <64 x i1> [[TMP0]], i32 54
+; CHECK-NEXT:    br i1 [[TMP170]], label %[[PRED_STORE_IF107:.*]], label %[[PRED_STORE_CONTINUE108:.*]]
+; CHECK:       [[PRED_STORE_IF107]]:
+; CHECK-NEXT:    [[TMP171:%.*]] = add i64 [[INDEX]], 54
+; CHECK-NEXT:    [[TMP172:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP171]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP172]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE108]]
+; CHECK:       [[PRED_STORE_CONTINUE108]]:
+; CHECK-NEXT:    [[TMP173:%.*]] = extractelement <64 x i1> [[TMP0]], i32 55
+; CHECK-NEXT:    br i1 [[TMP173]], label %[[PRED_STORE_IF109:.*]], label %[[PRED_STORE_CONTINUE110:.*]]
+; CHECK:       [[PRED_STORE_IF109]]:
+; CHECK-NEXT:    [[TMP174:%.*]] = add i64 [[INDEX]], 55
+; CHECK-NEXT:    [[TMP175:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP174]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP175]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE110]]
+; CHECK:       [[PRED_STORE_CONTINUE110]]:
+; CHECK-NEXT:    [[TMP176:%.*]] = extractelement <64 x i1> [[TMP0]], i32 56
+; CHECK-NEXT:    br i1 [[TMP176]], label %[[PRED_STORE_IF111:.*]], label %[[PRED_STORE_CONTINUE112:.*]]
+; CHECK:       [[PRED_STORE_IF111]]:
+; CHECK-NEXT:    [[TMP177:%.*]] = add i64 [[INDEX]], 56
+; CHECK-NEXT:    [[TMP178:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP177]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP178]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE112]]
+; CHECK:       [[PRED_STORE_CONTINUE112]]:
+; CHECK-NEXT:    [[TMP179:%.*]] = extractelement <64 x i1> [[TMP0]], i32 57
+; CHECK-NEXT:    br i1 [[TMP179]], label %[[PRED_STORE_IF113:.*]], label %[[PRED_STORE_CONTINUE114:.*]]
+; CHECK:       [[PRED_STORE_IF113]]:
+; CHECK-NEXT:    [[TMP180:%.*]] = add i64 [[INDEX]], 57
+; CHECK-NEXT:    [[TMP181:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP180]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP181]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE114]]
+; CHECK:       [[PRED_STORE_CONTINUE114]]:
+; CHECK-NEXT:    [[TMP182:%.*]] = extractelement <64 x i1> [[TMP0]], i32 58
+; CHECK-NEXT:    br i1 [[TMP182]], label %[[PRED_STORE_IF115:.*]], label %[[PRED_STORE_CONTINUE116:.*]]
+; CHECK:       [[PRED_STORE_IF115]]:
+; CHECK-NEXT:    [[TMP183:%.*]] = add i64 [[INDEX]], 58
+; CHECK-NEXT:    [[TMP184:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP183]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP184]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE116]]
+; CHECK:       [[PRED_STORE_CONTINUE116]]:
+; CHECK-NEXT:    [[TMP185:%.*]] = extractelement <64 x i1> [[TMP0]], i32 59
+; CHECK-NEXT:    br i1 [[TMP185]], label %[[PRED_STORE_IF117:.*]], label %[[PRED_STORE_CONTINUE118:.*]]
+; CHECK:       [[PRED_STORE_IF117]]:
+; CHECK-NEXT:    [[TMP186:%.*]] = add i64 [[INDEX]], 59
+; CHECK-NEXT:    [[TMP187:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP186]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP187]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE118]]
+; CHECK:       [[PRED_STORE_CONTINUE118]]:
+; CHECK-NEXT:    [[TMP188:%.*]] = extractelement <64 x i1> [[TMP0]], i32 60
+; CHECK-NEXT:    br i1 [[TMP188]], label %[[PRED_STORE_IF119:.*]], label %[[PRED_STORE_CONTINUE120:.*]]
+; CHECK:       [[PRED_STORE_IF119]]:
+; CHECK-NEXT:    [[TMP189:%.*]] = add i64 [[INDEX]], 60
+; CHECK-NEXT:    [[TMP190:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP189]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP190]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE120]]
+; CHECK:       [[PRED_STORE_CONTINUE120]]:
+; CHECK-NEXT:    [[TMP191:%.*]] = extractelement <64 x i1> [[TMP0]], i32 61
+; CHECK-NEXT:    br i1 [[TMP191]], label %[[PRED_STORE_IF121:.*]], label %[[PRED_STORE_CONTINUE122:.*]]
+; CHECK:       [[PRED_STORE_IF121]]:
+; CHECK-NEXT:    [[TMP192:%.*]] = add i64 [[INDEX]], 61
+; CHECK-NEXT:    [[TMP193:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP192]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP193]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE122]]
+; CHECK:       [[PRED_STORE_CONTINUE122]]:
+; CHECK-NEXT:    [[TMP194:%.*]] = extractelement <64 x i1> [[TMP0]], i32 62
+; CHECK-NEXT:    br i1 [[TMP194]], label %[[PRED_STORE_IF123:.*]], label %[[PRED_STORE_CONTINUE124:.*]]
+; CHECK:       [[PRED_STORE_IF123]]:
+; CHECK-NEXT:    [[TMP195:%.*]] = add i64 [[INDEX]], 62
+; CHECK-NEXT:    [[TMP196:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP195]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP196]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE124]]
+; CHECK:       [[PRED_STORE_CONTINUE124]]:
+; CHECK-NEXT:    [[TMP197:%.*]] = extractelement <64 x i1> [[TMP0]], i32 63
+; CHECK-NEXT:    br i1 [[TMP197]], label %[[PRED_STORE_IF125:.*]], label %[[PRED_STORE_CONTINUE126:.*]]
+; CHECK:       [[PRED_STORE_IF125]]:
+; CHECK-NEXT:    [[TMP198:%.*]] = add i64 [[INDEX]], 63
+; CHECK-NEXT:    [[TMP199:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP198]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP199]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE126]]
+; CHECK:       [[PRED_STORE_CONTINUE126]]:
+; CHECK-NEXT:    [[TMP200:%.*]] = extractelement <64 x i1> [[TMP1]], i32 0
+; CHECK-NEXT:    br i1 [[TMP200]], label %[[PRED_STORE_IF127:.*]], label %[[PRED_STORE_CONTINUE128:.*]]
+; CHECK:       [[PRED_STORE_IF127]]:
+; CHECK-NEXT:    [[TMP201:%.*]] = add i64 [[INDEX]], 64
+; CHECK-NEXT:    [[TMP202:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP201]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP202]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE128]]
+; CHECK:       [[PRED_STORE_CONTINUE128]]:
+; CHECK-NEXT:    [[TMP203:%.*]] = extractelement <64 x i1> [[TMP1]], i32 1
+; CHECK-NEXT:    br i1 [[TMP203]], label %[[PRED_STORE_IF129:.*]], label %[[PRED_STORE_CONTINUE130:.*]]
+; CHECK:       [[PRED_STORE_IF129]]:
+; CHECK-NEXT:    [[TMP204:%.*]] = add i64 [[INDEX]], 65
+; CHECK-NEXT:    [[TMP205:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP204]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP205]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE130]]
+; CHECK:       [[PRED_STORE_CONTINUE130]]:
+; CHECK-NEXT:    [[TMP206:%.*]] = extractelement <64 x i1> [[TMP1]], i32 2
+; CHECK-NEXT:    br i1 [[TMP206]], label %[[PRED_STORE_IF131:.*]], label %[[PRED_STORE_CONTINUE132:.*]]
+; CHECK:       [[PRED_STORE_IF131]]:
+; CHECK-NEXT:    [[TMP207:%.*]] = add i64 [[INDEX]], 66
+; CHECK-NEXT:    [[TMP208:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP207]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP208]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE132]]
+; CHECK:       [[PRED_STORE_CONTINUE132]]:
+; CHECK-NEXT:    [[TMP209:%.*]] = extractelement <64 x i1> [[TMP1]], i32 3
+; CHECK-NEXT:    br i1 [[TMP209]], label %[[PRED_STORE_IF133:.*]], label %[[PRED_STORE_CONTINUE134:.*]]
+; CHECK:       [[PRED_STORE_IF133]]:
+; CHECK-NEXT:    [[TMP210:%.*]] = add i64 [[INDEX]], 67
+; CHECK-NEXT:    [[TMP211:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP210]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP211]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE134]]
+; CHECK:       [[PRED_STORE_CONTINUE134]]:
+; CHECK-NEXT:    [[TMP212:%.*]] = extractelement <64 x i1> [[TMP1]], i32 4
+; CHECK-NEXT:    br i1 [[TMP212]], label %[[PRED_STORE_IF135:.*]], label %[[PRED_STORE_CONTINUE136:.*]]
+; CHECK:       [[PRED_STORE_IF135]]:
+; CHECK-NEXT:    [[TMP213:%.*]] = add i64 [[INDEX]], 68
+; CHECK-NEXT:    [[TMP214:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP213]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP214]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE136]]
+; CHECK:       [[PRED_STORE_CONTINUE136]]:
+; CHECK-NEXT:    [[TMP215:%.*]] = extractelement <64 x i1> [[TMP1]], i32 5
+; CHECK-NEXT:    br i1 [[TMP215]], label %[[PRED_STORE_IF137:.*]], label %[[PRED_STORE_CONTINUE138:.*]]
+; CHECK:       [[PRED_STORE_IF137]]:
+; CHECK-NEXT:    [[TMP216:%.*]] = add i64 [[INDEX]], 69
+; CHECK-NEXT:    [[TMP217:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP216]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP217]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE138]]
+; CHECK:       [[PRED_STORE_CONTINUE138]]:
+; CHECK-NEXT:    [[TMP218:%.*]] = extractelement <64 x i1> [[TMP1]], i32 6
+; CHECK-NEXT:    br i1 [[TMP218]], label %[[PRED_STORE_IF139:.*]], label %[[PRED_STORE_CONTINUE140:.*]]
+; CHECK:       [[PRED_STORE_IF139]]:
+; CHECK-NEXT:    [[TMP219:%.*]] = add i64 [[INDEX]], 70
+; CHECK-NEXT:    [[TMP220:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP219]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP220]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE140]]
+; CHECK:       [[PRED_STORE_CONTINUE140]]:
+; CHECK-NEXT:    [[TMP221:%.*]] = extractelement <64 x i1> [[TMP1]], i32 7
+; CHECK-NEXT:    br i1 [[TMP221]], label %[[PRED_STORE_IF141:.*]], label %[[PRED_STORE_CONTINUE142:.*]]
+; CHECK:       [[PRED_STORE_IF141]]:
+; CHECK-NEXT:    [[TMP222:%.*]] = add i64 [[INDEX]], 71
+; CHECK-NEXT:    [[TMP223:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP222]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP223]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE142]]
+; CHECK:       [[PRED_STORE_CONTINUE142]]:
+; CHECK-NEXT:    [[TMP224:%.*]] = extractelement <64 x i1> [[TMP1]], i32 8
+; CHECK-NEXT:    br i1 [[TMP224]], label %[[PRED_STORE_IF143:.*]], label %[[PRED_STORE_CONTINUE144:.*]]
+; CHECK:       [[PRED_STORE_IF143]]:
+; CHECK-NEXT:    [[TMP225:%.*]] = add i64 [[INDEX]], 72
+; CHECK-NEXT:    [[TMP226:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP225]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP226]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE144]]
+; CHECK:       [[PRED_STORE_CONTINUE144]]:
+; CHECK-NEXT:    [[TMP227:%.*]] = extractelement <64 x i1> [[TMP1]], i32 9
+; CHECK-NEXT:    br i1 [[TMP227]], label %[[PRED_STORE_IF145:.*]], label %[[PRED_STORE_CONTINUE146:.*]]
+; CHECK:       [[PRED_STORE_IF145]]:
+; CHECK-NEXT:    [[TMP228:%.*]] = add i64 [[INDEX]], 73
+; CHECK-NEXT:    [[TMP229:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP228]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP229]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE146]]
+; CHECK:       [[PRED_STORE_CONTINUE146]]:
+; CHECK-NEXT:    [[TMP230:%.*]] = extractelement <64 x i1> [[TMP1]], i32 10
+; CHECK-NEXT:    br i1 [[TMP230]], label %[[PRED_STORE_IF147:.*]], label %[[PRED_STORE_CONTINUE148:.*]]
+; CHECK:       [[PRED_STORE_IF147]]:
+; CHECK-NEXT:    [[TMP231:%.*]] = add i64 [[INDEX]], 74
+; CHECK-NEXT:    [[TMP232:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP231]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP232]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE148]]
+; CHECK:       [[PRED_STORE_CONTINUE148]]:
+; CHECK-NEXT:    [[TMP233:%.*]] = extractelement <64 x i1> [[TMP1]], i32 11
+; CHECK-NEXT:    br i1 [[TMP233]], label %[[PRED_STORE_IF149:.*]], label %[[PRED_STORE_CONTINUE150:.*]]
+; CHECK:       [[PRED_STORE_IF149]]:
+; CHECK-NEXT:    [[TMP234:%.*]] = add i64 [[INDEX]], 75
+; CHECK-NEXT:    [[TMP235:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP234]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP235]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE150]]
+; CHECK:       [[PRED_STORE_CONTINUE150]]:
+; CHECK-NEXT:    [[TMP236:%.*]] = extractelement <64 x i1> [[TMP1]], i32 12
+; CHECK-NEXT:    br i1 [[TMP236]], label %[[PRED_STORE_IF151:.*]], label %[[PRED_STORE_CONTINUE152:.*]]
+; CHECK:       [[PRED_STORE_IF151]]:
+; CHECK-NEXT:    [[TMP237:%.*]] = add i64 [[INDEX]], 76
+; CHECK-NEXT:    [[TMP238:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP237]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP238]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE152]]
+; CHECK:       [[PRED_STORE_CONTINUE152]]:
+; CHECK-NEXT:    [[TMP239:%.*]] = extractelement <64 x i1> [[TMP1]], i32 13
+; CHECK-NEXT:    br i1 [[TMP239]], label %[[PRED_STORE_IF153:.*]], label %[[PRED_STORE_CONTINUE154:.*]]
+; CHECK:       [[PRED_STORE_IF153]]:
+; CHECK-NEXT:    [[TMP240:%.*]] = add i64 [[INDEX]], 77
+; CHECK-NEXT:    [[TMP241:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP240]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP241]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE154]]
+; CHECK:       [[PRED_STORE_CONTINUE154]]:
+; CHECK-NEXT:    [[TMP242:%.*]] = extractelement <64 x i1> [[TMP1]], i32 14
+; CHECK-NEXT:    br i1 [[TMP242]], label %[[PRED_STORE_IF155:.*]], label %[[PRED_STORE_CONTINUE156:.*]]
+; CHECK:       [[PRED_STORE_IF155]]:
+; CHECK-NEXT:    [[TMP243:%.*]] = add i64 [[INDEX]], 78
+; CHECK-NEXT:    [[TMP244:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP243]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP244]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE156]]
+; CHECK:       [[PRED_STORE_CONTINUE156]]:
+; CHECK-NEXT:    [[TMP245:%.*]] = extractelement <64 x i1> [[TMP1]], i32 15
+; CHECK-NEXT:    br i1 [[TMP245]], label %[[PRED_STORE_IF157:.*]], label %[[PRED_STORE_CONTINUE158:.*]]
+; CHECK:       [[PRED_STORE_IF157]]:
+; CHECK-NEXT:    [[TMP246:%.*]] = add i64 [[INDEX]], 79
+; CHECK-NEXT:    [[TMP247:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP246]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP247]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE158]]
+; CHECK:       [[PRED_STORE_CONTINUE158]]:
+; CHECK-NEXT:    [[TMP248:%.*]] = extractelement <64 x i1> [[TMP1]], i32 16
+; CHECK-NEXT:    br i1 [[TMP248]], label %[[PRED_STORE_IF159:.*]], label %[[PRED_STORE_CONTINUE160:.*]]
+; CHECK:       [[PRED_STORE_IF159]]:
+; CHECK-NEXT:    [[TMP249:%.*]] = add i64 [[INDEX]], 80
+; CHECK-NEXT:    [[TMP250:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP249]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP250]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE160]]
+; CHECK:       [[PRED_STORE_CONTINUE160]]:
+; CHECK-NEXT:    [[TMP251:%.*]] = extractelement <64 x i1> [[TMP1]], i32 17
+; CHECK-NEXT:    br i1 [[TMP251]], label %[[PRED_STORE_IF161:.*]], label %[[PRED_STORE_CONTINUE162:.*]]
+; CHECK:       [[PRED_STORE_IF161]]:
+; CHECK-NEXT:    [[TMP252:%.*]] = add i64 [[INDEX]], 81
+; CHECK-NEXT:    [[TMP253:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP252]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP253]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE162]]
+; CHECK:       [[PRED_STORE_CONTINUE162]]:
+; CHECK-NEXT:    [[TMP254:%.*]] = extractelement <64 x i1> [[TMP1]], i32 18
+; CHECK-NEXT:    br i1 [[TMP254]], label %[[PRED_STORE_IF163:.*]], label %[[PRED_STORE_CONTINUE164:.*]]
+; CHECK:       [[PRED_STORE_IF163]]:
+; CHECK-NEXT:    [[TMP255:%.*]] = add i64 [[INDEX]], 82
+; CHECK-NEXT:    [[TMP256:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP255]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP256]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE164]]
+; CHECK:       [[PRED_STORE_CONTINUE164]]:
+; CHECK-NEXT:    [[TMP257:%.*]] = extractelement <64 x i1> [[TMP1]], i32 19
+; CHECK-NEXT:    br i1 [[TMP257]], label %[[PRED_STORE_IF165:.*]], label %[[PRED_STORE_CONTINUE166:.*]]
+; CHECK:       [[PRED_STORE_IF165]]:
+; CHECK-NEXT:    [[TMP258:%.*]] = add i64 [[INDEX]], 83
+; CHECK-NEXT:    [[TMP259:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP258]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP259]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE166]]
+; CHECK:       [[PRED_STORE_CONTINUE166]]:
+; CHECK-NEXT:    [[TMP260:%.*]] = extractelement <64 x i1> [[TMP1]], i32 20
+; CHECK-NEXT:    br i1 [[TMP260]], label %[[PRED_STORE_IF167:.*]], label %[[PRED_STORE_CONTINUE168:.*]]
+; CHECK:       [[PRED_STORE_IF167]]:
+; CHECK-NEXT:    [[TMP261:%.*]] = add i64 [[INDEX]], 84
+; CHECK-NEXT:    [[TMP262:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP261]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP262]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE168]]
+; CHECK:       [[PRED_STORE_CONTINUE168]]:
+; CHECK-NEXT:    [[TMP263:%.*]] = extractelement <64 x i1> [[TMP1]], i32 21
+; CHECK-NEXT:    br i1 [[TMP263]], label %[[PRED_STORE_IF169:.*]], label %[[PRED_STORE_CONTINUE170:.*]]
+; CHECK:       [[PRED_STORE_IF169]]:
+; CHECK-NEXT:    [[TMP264:%.*]] = add i64 [[INDEX]], 85
+; CHECK-NEXT:    [[TMP265:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP264]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP265]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE170]]
+; CHECK:       [[PRED_STORE_CONTINUE170]]:
+; CHECK-NEXT:    [[TMP266:%.*]] = extractelement <64 x i1> [[TMP1]], i32 22
+; CHECK-NEXT:    br i1 [[TMP266]], label %[[PRED_STORE_IF171:.*]], label %[[PRED_STORE_CONTINUE172:.*]]
+; CHECK:       [[PRED_STORE_IF171]]:
+; CHECK-NEXT:    [[TMP267:%.*]] = add i64 [[INDEX]], 86
+; CHECK-NEXT:    [[TMP268:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP267]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP268]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE172]]
+; CHECK:       [[PRED_STORE_CONTINUE172]]:
+; CHECK-NEXT:    [[TMP269:%.*]] = extractelement <64 x i1> [[TMP1]], i32 23
+; CHECK-NEXT:    br i1 [[TMP269]], label %[[PRED_STORE_IF173:.*]], label %[[PRED_STORE_CONTINUE174:.*]]
+; CHECK:       [[PRED_STORE_IF173]]:
+; CHECK-NEXT:    [[TMP270:%.*]] = add i64 [[INDEX]], 87
+; CHECK-NEXT:    [[TMP271:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP270]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP271]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE174]]
+; CHECK:       [[PRED_STORE_CONTINUE174]]:
+; CHECK-NEXT:    [[TMP272:%.*]] = extractelement <64 x i1> [[TMP1]], i32 24
+; CHECK-NEXT:    br i1 [[TMP272]], label %[[PRED_STORE_IF175:.*]], label %[[PRED_STORE_CONTINUE176:.*]]
+; CHECK:       [[PRED_STORE_IF175]]:
+; CHECK-NEXT:    [[TMP273:%.*]] = add i64 [[INDEX]], 88
+; CHECK-NEXT:    [[TMP274:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP273]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP274]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE176]]
+; CHECK:       [[PRED_STORE_CONTINUE176]]:
+; CHECK-NEXT:    [[TMP275:%.*]] = extractelement <64 x i1> [[TMP1]], i32 25
+; CHECK-NEXT:    br i1 [[TMP275]], label %[[PRED_STORE_IF177:.*]], label %[[PRED_STORE_CONTINUE178:.*]]
+; CHECK:       [[PRED_STORE_IF177]]:
+; CHECK-NEXT:    [[TMP276:%.*]] = add i64 [[INDEX]], 89
+; CHECK-NEXT:    [[TMP277:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP276]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP277]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE178]]
+; CHECK:       [[PRED_STORE_CONTINUE178]]:
+; CHECK-NEXT:    [[TMP278:%.*]] = extractelement <64 x i1> [[TMP1]], i32 26
+; CHECK-NEXT:    br i1 [[TMP278]], label %[[PRED_STORE_IF179:.*]], label %[[PRED_STORE_CONTINUE180:.*]]
+; CHECK:       [[PRED_STORE_IF179]]:
+; CHECK-NEXT:    [[TMP279:%.*]] = add i64 [[INDEX]], 90
+; CHECK-NEXT:    [[TMP280:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP279]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP280]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE180]]
+; CHECK:       [[PRED_STORE_CONTINUE180]]:
+; CHECK-NEXT:    [[TMP281:%.*]] = extractelement <64 x i1> [[TMP1]], i32 27
+; CHECK-NEXT:    br i1 [[TMP281]], label %[[PRED_STORE_IF181:.*]], label %[[PRED_STORE_CONTINUE182:.*]]
+; CHECK:       [[PRED_STORE_IF181]]:
+; CHECK-NEXT:    [[TMP282:%.*]] = add i64 [[INDEX]], 91
+; CHECK-NEXT:    [[TMP283:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP282]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP283]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE182]]
+; CHECK:       [[PRED_STORE_CONTINUE182]]:
+; CHECK-NEXT:    [[TMP284:%.*]] = extractelement <64 x i1> [[TMP1]], i32 28
+; CHECK-NEXT:    br i1 [[TMP284]], label %[[PRED_STORE_IF183:.*]], label %[[PRED_STORE_CONTINUE184:.*]]
+; CHECK:       [[PRED_STORE_IF183]]:
+; CHECK-NEXT:    [[TMP285:%.*]] = add i64 [[INDEX]], 92
+; CHECK-NEXT:    [[TMP286:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP285]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP286]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE184]]
+; CHECK:       [[PRED_STORE_CONTINUE184]]:
+; CHECK-NEXT:    [[TMP287:%.*]] = extractelement <64 x i1> [[TMP1]], i32 29
+; CHECK-NEXT:    br i1 [[TMP287]], label %[[PRED_STORE_IF185:.*]], label %[[PRED_STORE_CONTINUE186:.*]]
+; CHECK:       [[PRED_STORE_IF185]]:
+; CHECK-NEXT:    [[TMP288:%.*]] = add i64 [[INDEX]], 93
+; CHECK-NEXT:    [[TMP289:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP288]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP289]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE186]]
+; CHECK:       [[PRED_STORE_CONTINUE186]]:
+; CHECK-NEXT:    [[TMP290:%.*]] = extractelement <64 x i1> [[TMP1]], i32 30
+; CHECK-NEXT:    br i1 [[TMP290]], label %[[PRED_STORE_IF187:.*]], label %[[PRED_STORE_CONTINUE188:.*]]
+; CHECK:       [[PRED_STORE_IF187]]:
+; CHECK-NEXT:    [[TMP291:%.*]] = add i64 [[INDEX]], 94
+; CHECK-NEXT:    [[TMP292:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP291]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP292]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE188]]
+; CHECK:       [[PRED_STORE_CONTINUE188]]:
+; CHECK-NEXT:    [[TMP293:%.*]] = extractelement <64 x i1> [[TMP1]], i32 31
+; CHECK-NEXT:    br i1 [[TMP293]], label %[[PRED_STORE_IF189:.*]], label %[[PRED_STORE_CONTINUE190:.*]]
+; CHECK:       [[PRED_STORE_IF189]]:
+; CHECK-NEXT:    [[TMP294:%.*]] = add i64 [[INDEX]], 95
+; CHECK-NEXT:    [[TMP295:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP294]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP295]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE190]]
+; CHECK:       [[PRED_STORE_CONTINUE190]]:
+; CHECK-NEXT:    [[TMP296:%.*]] = extractelement <64 x i1> [[TMP1]], i32 32
+; CHECK-NEXT:    br i1 [[TMP296]], label %[[PRED_STORE_IF191:.*]], label %[[PRED_STORE_CONTINUE192:.*]]
+; CHECK:       [[PRED_STORE_IF191]]:
+; CHECK-NEXT:    [[TMP297:%.*]] = add i64 [[INDEX]], 96
+; CHECK-NEXT:    [[TMP298:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP297]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP298]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE192]]
+; CHECK:       [[PRED_STORE_CONTINUE192]]:
+; CHECK-NEXT:    [[TMP299:%.*]] = extractelement <64 x i1> [[TMP1]], i32 33
+; CHECK-NEXT:    br i1 [[TMP299]], label %[[PRED_STORE_IF193:.*]], label %[[PRED_STORE_CONTINUE194:.*]]
+; CHECK:       [[PRED_STORE_IF193]]:
+; CHECK-NEXT:    [[TMP300:%.*]] = add i64 [[INDEX]], 97
+; CHECK-NEXT:    [[TMP301:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP300]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP301]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE194]]
+; CHECK:       [[PRED_STORE_CONTINUE194]]:
+; CHECK-NEXT:    [[TMP302:%.*]] = extractelement <64 x i1> [[TMP1]], i32 34
+; CHECK-NEXT:    br i1 [[TMP302]], label %[[PRED_STORE_IF195:.*]], label %[[PRED_STORE_CONTINUE196:.*]]
+; CHECK:       [[PRED_STORE_IF195]]:
+; CHECK-NEXT:    [[TMP303:%.*]] = add i64 [[INDEX]], 98
+; CHECK-NEXT:    [[TMP304:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP303]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP304]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE196]]
+; CHECK:       [[PRED_STORE_CONTINUE196]]:
+; CHECK-NEXT:    [[TMP305:%.*]] = extractelement <64 x i1> [[TMP1]], i32 35
+; CHECK-NEXT:    br i1 [[TMP305]], label %[[PRED_STORE_IF197:.*]], label %[[PRED_STORE_CONTINUE198:.*]]
+; CHECK:       [[PRED_STORE_IF197]]:
+; CHECK-NEXT:    [[TMP306:%.*]] = add i64 [[INDEX]], 99
+; CHECK-NEXT:    [[TMP307:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP306]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP307]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE198]]
+; CHECK:       [[PRED_STORE_CONTINUE198]]:
+; CHECK-NEXT:    [[TMP308:%.*]] = extractelement <64 x i1> [[TMP1]], i32 36
+; CHECK-NEXT:    br i1 [[TMP308]], label %[[PRED_STORE_IF199:.*]], label %[[PRED_STORE_CONTINUE200:.*]]
+; CHECK:       [[PRED_STORE_IF199]]:
+; CHECK-NEXT:    [[TMP309:%.*]] = add i64 [[INDEX]], 100
+; CHECK-NEXT:    [[TMP310:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP309]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP310]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE200]]
+; CHECK:       [[PRED_STORE_CONTINUE200]]:
+; CHECK-NEXT:    [[TMP311:%.*]] = extractelement <64 x i1> [[TMP1]], i32 37
+; CHECK-NEXT:    br i1 [[TMP311]], label %[[PRED_STORE_IF201:.*]], label %[[PRED_STORE_CONTINUE202:.*]]
+; CHECK:       [[PRED_STORE_IF201]]:
+; CHECK-NEXT:    [[TMP312:%.*]] = add i64 [[INDEX]], 101
+; CHECK-NEXT:    [[TMP313:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP312]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP313]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE202]]
+; CHECK:       [[PRED_STORE_CONTINUE202]]:
+; CHECK-NEXT:    [[TMP314:%.*]] = extractelement <64 x i1> [[TMP1]], i32 38
+; CHECK-NEXT:    br i1 [[TMP314]], label %[[PRED_STORE_IF203:.*]], label %[[PRED_STORE_CONTINUE204:.*]]
+; CHECK:       [[PRED_STORE_IF203]]:
+; CHECK-NEXT:    [[TMP315:%.*]] = add i64 [[INDEX]], 102
+; CHECK-NEXT:    [[TMP316:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP315]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP316]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE204]]
+; CHECK:       [[PRED_STORE_CONTINUE204]]:
+; CHECK-NEXT:    [[TMP317:%.*]] = extractelement <64 x i1> [[TMP1]], i32 39
+; CHECK-NEXT:    br i1 [[TMP317]], label %[[PRED_STORE_IF205:.*]], label %[[PRED_STORE_CONTINUE206:.*]]
+; CHECK:       [[PRED_STORE_IF205]]:
+; CHECK-NEXT:    [[TMP318:%.*]] = add i64 [[INDEX]], 103
+; CHECK-NEXT:    [[TMP319:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP318]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP319]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE206]]
+; CHECK:       [[PRED_STORE_CONTINUE206]]:
+; CHECK-NEXT:    [[TMP320:%.*]] = extractelement <64 x i1> [[TMP1]], i32 40
+; CHECK-NEXT:    br i1 [[TMP320]], label %[[PRED_STORE_IF207:.*]], label %[[PRED_STORE_CONTINUE208:.*]]
+; CHECK:       [[PRED_STORE_IF207]]:
+; CHECK-NEXT:    [[TMP321:%.*]] = add i64 [[INDEX]], 104
+; CHECK-NEXT:    [[TMP322:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP321]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP322]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE208]]
+; CHECK:       [[PRED_STORE_CONTINUE208]]:
+; CHECK-NEXT:    [[TMP323:%.*]] = extractelement <64 x i1> [[TMP1]], i32 41
+; CHECK-NEXT:    br i1 [[TMP323]], label %[[PRED_STORE_IF209:.*]], label %[[PRED_STORE_CONTINUE210:.*]]
+; CHECK:       [[PRED_STORE_IF209]]:
+; CHECK-NEXT:    [[TMP324:%.*]] = add i64 [[INDEX]], 105
+; CHECK-NEXT:    [[TMP325:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP324]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP325]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE210]]
+; CHECK:       [[PRED_STORE_CONTINUE210]]:
+; CHECK-NEXT:    [[TMP326:%.*]] = extractelement <64 x i1> [[TMP1]], i32 42
+; CHECK-NEXT:    br i1 [[TMP326]], label %[[PRED_STORE_IF211:.*]], label %[[PRED_STORE_CONTINUE212:.*]]
+; CHECK:       [[PRED_STORE_IF211]]:
+; CHECK-NEXT:    [[TMP327:%.*]] = add i64 [[INDEX]], 106
+; CHECK-NEXT:    [[TMP328:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP327]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP328]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE212]]
+; CHECK:       [[PRED_STORE_CONTINUE212]]:
+; CHECK-NEXT:    [[TMP329:%.*]] = extractelement <64 x i1> [[TMP1]], i32 43
+; CHECK-NEXT:    br i1 [[TMP329]], label %[[PRED_STORE_IF213:.*]], label %[[PRED_STORE_CONTINUE214:.*]]
+; CHECK:       [[PRED_STORE_IF213]]:
+; CHECK-NEXT:    [[TMP330:%.*]] = add i64 [[INDEX]], 107
+; CHECK-NEXT:    [[TMP331:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP330]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP331]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE214]]
+; CHECK:       [[PRED_STORE_CONTINUE214]]:
+; CHECK-NEXT:    [[TMP332:%.*]] = extractelement <64 x i1> [[TMP1]], i32 44
+; CHECK-NEXT:    br i1 [[TMP332]], label %[[PRED_STORE_IF215:.*]], label %[[PRED_STORE_CONTINUE216:.*]]
+; CHECK:       [[PRED_STORE_IF215]]:
+; CHECK-NEXT:    [[TMP333:%.*]] = add i64 [[INDEX]], 108
+; CHECK-NEXT:    [[TMP334:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP333]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP334]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE216]]
+; CHECK:       [[PRED_STORE_CONTINUE216]]:
+; CHECK-NEXT:    [[TMP335:%.*]] = extractelement <64 x i1> [[TMP1]], i32 45
+; CHECK-NEXT:    br i1 [[TMP335]], label %[[PRED_STORE_IF217:.*]], label %[[PRED_STORE_CONTINUE218:.*]]
+; CHECK:       [[PRED_STORE_IF217]]:
+; CHECK-NEXT:    [[TMP336:%.*]] = add i64 [[INDEX]], 109
+; CHECK-NEXT:    [[TMP337:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP336]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP337]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE218]]
+; CHECK:       [[PRED_STORE_CONTINUE218]]:
+; CHECK-NEXT:    [[TMP338:%.*]] = extractelement <64 x i1> [[TMP1]], i32 46
+; CHECK-NEXT:    br i1 [[TMP338]], label %[[PRED_STORE_IF219:.*]], label %[[PRED_STORE_CONTINUE220:.*]]
+; CHECK:       [[PRED_STORE_IF219]]:
+; CHECK-NEXT:    [[TMP339:%.*]] = add i64 [[INDEX]], 110
+; CHECK-NEXT:    [[TMP340:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP339]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP340]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE220]]
+; CHECK:       [[PRED_STORE_CONTINUE220]]:
+; CHECK-NEXT:    [[TMP341:%.*]] = extractelement <64 x i1> [[TMP1]], i32 47
+; CHECK-NEXT:    br i1 [[TMP341]], label %[[PRED_STORE_IF221:.*]], label %[[PRED_STORE_CONTINUE222:.*]]
+; CHECK:       [[PRED_STORE_IF221]]:
+; CHECK-NEXT:    [[TMP342:%.*]] = add i64 [[INDEX]], 111
+; CHECK-NEXT:    [[TMP343:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP342]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP343]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE222]]
+; CHECK:       [[PRED_STORE_CONTINUE222]]:
+; CHECK-NEXT:    [[TMP344:%.*]] = extractelement <64 x i1> [[TMP1]], i32 48
+; CHECK-NEXT:    br i1 [[TMP344]], label %[[PRED_STORE_IF223:.*]], label %[[PRED_STORE_CONTINUE224:.*]]
+; CHECK:       [[PRED_STORE_IF223]]:
+; CHECK-NEXT:    [[TMP345:%.*]] = add i64 [[INDEX]], 112
+; CHECK-NEXT:    [[TMP346:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP345]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP346]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE224]]
+; CHECK:       [[PRED_STORE_CONTINUE224]]:
+; CHECK-NEXT:    [[TMP347:%.*]] = extractelement <64 x i1> [[TMP1]], i32 49
+; CHECK-NEXT:    br i1 [[TMP347]], label %[[PRED_STORE_IF225:.*]], label %[[PRED_STORE_CONTINUE226:.*]]
+; CHECK:       [[PRED_STORE_IF225]]:
+; CHECK-NEXT:    [[TMP348:%.*]] = add i64 [[INDEX]], 113
+; CHECK-NEXT:    [[TMP349:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP348]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP349]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE226]]
+; CHECK:       [[PRED_STORE_CONTINUE226]]:
+; CHECK-NEXT:    [[TMP350:%.*]] = extractelement <64 x i1> [[TMP1]], i32 50
+; CHECK-NEXT:    br i1 [[TMP350]], label %[[PRED_STORE_IF227:.*]], label %[[PRED_STORE_CONTINUE228:.*]]
+; CHECK:       [[PRED_STORE_IF227]]:
+; CHECK-NEXT:    [[TMP351:%.*]] = add i64 [[INDEX]], 114
+; CHECK-NEXT:    [[TMP352:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP351]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP352]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE228]]
+; CHECK:       [[PRED_STORE_CONTINUE228]]:
+; CHECK-NEXT:    [[TMP353:%.*]] = extractelement <64 x i1> [[TMP1]], i32 51
+; CHECK-NEXT:    br i1 [[TMP353]], label %[[PRED_STORE_IF229:.*]], label %[[PRED_STORE_CONTINUE230:.*]]
+; CHECK:       [[PRED_STORE_IF229]]:
+; CHECK-NEXT:    [[TMP354:%.*]] = add i64 [[INDEX]], 115
+; CHECK-NEXT:    [[TMP355:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP354]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP355]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE230]]
+; CHECK:       [[PRED_STORE_CONTINUE230]]:
+; CHECK-NEXT:    [[TMP356:%.*]] = extractelement <64 x i1> [[TMP1]], i32 52
+; CHECK-NEXT:    br i1 [[TMP356]], label %[[PRED_STORE_IF231:.*]], label %[[PRED_STORE_CONTINUE232:.*]]
+; CHECK:       [[PRED_STORE_IF231]]:
+; CHECK-NEXT:    [[TMP357:%.*]] = add i64 [[INDEX]], 116
+; CHECK-NEXT:    [[TMP358:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP357]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP358]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE232]]
+; CHECK:       [[PRED_STORE_CONTINUE232]]:
+; CHECK-NEXT:    [[TMP359:%.*]] = extractelement <64 x i1> [[TMP1]], i32 53
+; CHECK-NEXT:    br i1 [[TMP359]], label %[[PRED_STORE_IF233:.*]], label %[[PRED_STORE_CONTINUE234:.*]]
+; CHECK:       [[PRED_STORE_IF233]]:
+; CHECK-NEXT:    [[TMP360:%.*]] = add i64 [[INDEX]], 117
+; CHECK-NEXT:    [[TMP361:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP360]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP361]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE234]]
+; CHECK:       [[PRED_STORE_CONTINUE234]]:
+; CHECK-NEXT:    [[TMP362:%.*]] = extractelement <64 x i1> [[TMP1]], i32 54
+; CHECK-NEXT:    br i1 [[TMP362]], label %[[PRED_STORE_IF235:.*]], label %[[PRED_STORE_CONTINUE236:.*]]
+; CHECK:       [[PRED_STORE_IF235]]:
+; CHECK-NEXT:    [[TMP363:%.*]] = add i64 [[INDEX]], 118
+; CHECK-NEXT:    [[TMP364:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP363]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP364]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE236]]
+; CHECK:       [[PRED_STORE_CONTINUE236]]:
+; CHECK-NEXT:    [[TMP365:%.*]] = extractelement <64 x i1> [[TMP1]], i32 55
+; CHECK-NEXT:    br i1 [[TMP365]], label %[[PRED_STORE_IF237:.*]], label %[[PRED_STORE_CONTINUE238:.*]]
+; CHECK:       [[PRED_STORE_IF237]]:
+; CHECK-NEXT:    [[TMP366:%.*]] = add i64 [[INDEX]], 119
+; CHECK-NEXT:    [[TMP367:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP366]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP367]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE238]]
+; CHECK:       [[PRED_STORE_CONTINUE238]]:
+; CHECK-NEXT:    [[TMP368:%.*]] = extractelement <64 x i1> [[TMP1]], i32 56
+; CHECK-NEXT:    br i1 [[TMP368]], label %[[PRED_STORE_IF239:.*]], label %[[PRED_STORE_CONTINUE240:.*]]
+; CHECK:       [[PRED_STORE_IF239]]:
+; CHECK-NEXT:    [[TMP369:%.*]] = add i64 [[INDEX]], 120
+; CHECK-NEXT:    [[TMP370:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP369]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP370]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE240]]
+; CHECK:       [[PRED_STORE_CONTINUE240]]:
+; CHECK-NEXT:    [[TMP371:%.*]] = extractelement <64 x i1> [[TMP1]], i32 57
+; CHECK-NEXT:    br i1 [[TMP371]], label %[[PRED_STORE_IF241:.*]], label %[[PRED_STORE_CONTINUE242:.*]]
+; CHECK:       [[PRED_STORE_IF241]]:
+; CHECK-NEXT:    [[TMP372:%.*]] = add i64 [[INDEX]], 121
+; CHECK-NEXT:    [[TMP373:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP372]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP373]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE242]]
+; CHECK:       [[PRED_STORE_CONTINUE242]]:
+; CHECK-NEXT:    [[TMP374:%.*]] = extractelement <64 x i1> [[TMP1]], i32 58
+; CHECK-NEXT:    br i1 [[TMP374]], label %[[PRED_STORE_IF243:.*]], label %[[PRED_STORE_CONTINUE244:.*]]
+; CHECK:       [[PRED_STORE_IF243]]:
+; CHECK-NEXT:    [[TMP375:%.*]] = add i64 [[INDEX]], 122
+; CHECK-NEXT:    [[TMP376:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP375]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP376]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE244]]
+; CHECK:       [[PRED_STORE_CONTINUE244]]:
+; CHECK-NEXT:    [[TMP377:%.*]] = extractelement <64 x i1> [[TMP1]], i32 59
+; CHECK-NEXT:    br i1 [[TMP377]], label %[[PRED_STORE_IF245:.*]], label %[[PRED_STORE_CONTINUE246:.*]]
+; CHECK:       [[PRED_STORE_IF245]]:
+; CHECK-NEXT:    [[TMP378:%.*]] = add i64 [[INDEX]], 123
+; CHECK-NEXT:    [[TMP379:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP378]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP379]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE246]]
+; CHECK:       [[PRED_STORE_CONTINUE246]]:
+; CHECK-NEXT:    [[TMP380:%.*]] = extractelement <64 x i1> [[TMP1]], i32 60
+; CHECK-NEXT:    br i1 [[TMP380]], label %[[PRED_STORE_IF247:.*]], label %[[PRED_STORE_CONTINUE248:.*]]
+; CHECK:       [[PRED_STORE_IF247]]:
+; CHECK-NEXT:    [[TMP381:%.*]] = add i64 [[INDEX]], 124
+; CHECK-NEXT:    [[TMP382:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP381]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP382]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE248]]
+; CHECK:       [[PRED_STORE_CONTINUE248]]:
+; CHECK-NEXT:    [[TMP383:%.*]] = extractelement <64 x i1> [[TMP1]], i32 61
+; CHECK-NEXT:    br i1 [[TMP383]], label %[[PRED_STORE_IF249:.*]], label %[[PRED_STORE_CONTINUE250:.*]]
+; CHECK:       [[PRED_STORE_IF249]]:
+; CHECK-NEXT:    [[TMP384:%.*]] = add i64 [[INDEX]], 125
+; CHECK-NEXT:    [[TMP385:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP384]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP385]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE250]]
+; CHECK:       [[PRED_STORE_CONTINUE250]]:
+; CHECK-NEXT:    [[TMP386:%.*]] = extractelement <64 x i1> [[TMP1]], i32 62
+; CHECK-NEXT:    br i1 [[TMP386]], label %[[PRED_STORE_IF251:.*]], label %[[PRED_STORE_CONTINUE252:.*]]
+; CHECK:       [[PRED_STORE_IF251]]:
+; CHECK-NEXT:    [[TMP387:%.*]] = add i64 [[INDEX]], 126
+; CHECK-NEXT:    [[TMP388:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP387]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP388]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE252]]
+; CHECK:       [[PRED_STORE_CONTINUE252]]:
+; CHECK-NEXT:    [[TMP389:%.*]] = extractelement <64 x i1> [[TMP1]], i32 63
+; CHECK-NEXT:    br i1 [[TMP389]], label %[[PRED_STORE_IF253:.*]], label %[[PRED_STORE_CONTINUE254:.*]]
+; CHECK:       [[PRED_STORE_IF253]]:
+; CHECK-NEXT:    [[TMP390:%.*]] = add i64 [[INDEX]], 127
+; CHECK-NEXT:    [[TMP391:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP390]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP391]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE254]]
+; CHECK:       [[PRED_STORE_CONTINUE254]]:
+; CHECK-NEXT:    [[TMP392:%.*]] = extractelement <64 x i1> [[TMP2]], i32 0
+; CHECK-NEXT:    br i1 [[TMP392]], label %[[PRED_STORE_IF255:.*]], label %[[PRED_STORE_CONTINUE256:.*]]
+; CHECK:       [[PRED_STORE_IF255]]:
+; CHECK-NEXT:    [[TMP393:%.*]] = add i64 [[INDEX]], 128
+; CHECK-NEXT:    [[TMP394:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP393]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP394]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE256]]
+; CHECK:       [[PRED_STORE_CONTINUE256]]:
+; CHECK-NEXT:    [[TMP395:%.*]] = extractelement <64 x i1> [[TMP2]], i32 1
+; CHECK-NEXT:    br i1 [[TMP395]], label %[[PRED_STORE_IF257:.*]], label %[[PRED_STORE_CONTINUE258:.*]]
+; CHECK:       [[PRED_STORE_IF257]]:
+; CHECK-NEXT:    [[TMP396:%.*]] = add i64 [[INDEX]], 129
+; CHECK-NEXT:    [[TMP397:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP396]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP397]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE258]]
+; CHECK:       [[PRED_STORE_CONTINUE258]]:
+; CHECK-NEXT:    [[TMP398:%.*]] = extractelement <64 x i1> [[TMP2]], i32 2
+; CHECK-NEXT:    br i1 [[TMP398]], label %[[PRED_STORE_IF259:.*]], label %[[PRED_STORE_CONTINUE260:.*]]
+; CHECK:       [[PRED_STORE_IF259]]:
+; CHECK-NEXT:    [[TMP399:%.*]] = add i64 [[INDEX]], 130
+; CHECK-NEXT:    [[TMP400:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP399]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP400]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE260]]
+; CHECK:       [[PRED_STORE_CONTINUE260]]:
+; CHECK-NEXT:    [[TMP401:%.*]] = extractelement <64 x i1> [[TMP2]], i32 3
+; CHECK-NEXT:    br i1 [[TMP401]], label %[[PRED_STORE_IF261:.*]], label %[[PRED_STORE_CONTINUE262:.*]]
+; CHECK:       [[PRED_STORE_IF261]]:
+; CHECK-NEXT:    [[TMP402:%.*]] = add i64 [[INDEX]], 131
+; CHECK-NEXT:    [[TMP403:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP402]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP403]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE262]]
+; CHECK:       [[PRED_STORE_CONTINUE262]]:
+; CHECK-NEXT:    [[TMP404:%.*]] = extractelement <64 x i1> [[TMP2]], i32 4
+; CHECK-NEXT:    br i1 [[TMP404]], label %[[PRED_STORE_IF263:.*]], label %[[PRED_STORE_CONTINUE264:.*]]
+; CHECK:       [[PRED_STORE_IF263]]:
+; CHECK-NEXT:    [[TMP405:%.*]] = add i64 [[INDEX]], 132
+; CHECK-NEXT:    [[TMP406:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP405]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP406]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE264]]
+; CHECK:       [[PRED_STORE_CONTINUE264]]:
+; CHECK-NEXT:    [[TMP407:%.*]] = extractelement <64 x i1> [[TMP2]], i32 5
+; CHECK-NEXT:    br i1 [[TMP407]], label %[[PRED_STORE_IF265:.*]], label %[[PRED_STORE_CONTINUE266:.*]]
+; CHECK:       [[PRED_STORE_IF265]]:
+; CHECK-NEXT:    [[TMP408:%.*]] = add i64 [[INDEX]], 133
+; CHECK-NEXT:    [[TMP409:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP408]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP409]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE266]]
+; CHECK:       [[PRED_STORE_CONTINUE266]]:
+; CHECK-NEXT:    [[TMP410:%.*]] = extractelement <64 x i1> [[TMP2]], i32 6
+; CHECK-NEXT:    br i1 [[TMP410]], label %[[PRED_STORE_IF267:.*]], label %[[PRED_STORE_CONTINUE268:.*]]
+; CHECK:       [[PRED_STORE_IF267]]:
+; CHECK-NEXT:    [[TMP411:%.*]] = add i64 [[INDEX]], 134
+; CHECK-NEXT:    [[TMP412:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP411]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP412]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE268]]
+; CHECK:       [[PRED_STORE_CONTINUE268]]:
+; CHECK-NEXT:    [[TMP413:%.*]] = extractelement <64 x i1> [[TMP2]], i32 7
+; CHECK-NEXT:    br i1 [[TMP413]], label %[[PRED_STORE_IF269:.*]], label %[[PRED_STORE_CONTINUE270:.*]]
+; CHECK:       [[PRED_STORE_IF269]]:
+; CHECK-NEXT:    [[TMP414:%.*]] = add i64 [[INDEX]], 135
+; CHECK-NEXT:    [[TMP415:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP414]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP415]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE270]]
+; CHECK:       [[PRED_STORE_CONTINUE270]]:
+; CHECK-NEXT:    [[TMP416:%.*]] = extractelement <64 x i1> [[TMP2]], i32 8
+; CHECK-NEXT:    br i1 [[TMP416]], label %[[PRED_STORE_IF271:.*]], label %[[PRED_STORE_CONTINUE272:.*]]
+; CHECK:       [[PRED_STORE_IF271]]:
+; CHECK-NEXT:    [[TMP417:%.*]] = add i64 [[INDEX]], 136
+; CHECK-NEXT:    [[TMP418:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP417]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP418]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE272]]
+; CHECK:       [[PRED_STORE_CONTINUE272]]:
+; CHECK-NEXT:    [[TMP419:%.*]] = extractelement <64 x i1> [[TMP2]], i32 9
+; CHECK-NEXT:    br i1 [[TMP419]], label %[[PRED_STORE_IF273:.*]], label %[[PRED_STORE_CONTINUE274:.*]]
+; CHECK:       [[PRED_STORE_IF273]]:
+; CHECK-NEXT:    [[TMP420:%.*]] = add i64 [[INDEX]], 137
+; CHECK-NEXT:    [[TMP421:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP420]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP421]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE274]]
+; CHECK:       [[PRED_STORE_CONTINUE274]]:
+; CHECK-NEXT:    [[TMP422:%.*]] = extractelement <64 x i1> [[TMP2]], i32 10
+; CHECK-NEXT:    br i1 [[TMP422]], label %[[PRED_STORE_IF275:.*]], label %[[PRED_STORE_CONTINUE276:.*]]
+; CHECK:       [[PRED_STORE_IF275]]:
+; CHECK-NEXT:    [[TMP423:%.*]] = add i64 [[INDEX]], 138
+; CHECK-NEXT:    [[TMP424:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP423]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP424]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE276]]
+; CHECK:       [[PRED_STORE_CONTINUE276]]:
+; CHECK-NEXT:    [[TMP425:%.*]] = extractelement <64 x i1> [[TMP2]], i32 11
+; CHECK-NEXT:    br i1 [[TMP425]], label %[[PRED_STORE_IF277:.*]], label %[[PRED_STORE_CONTINUE278:.*]]
+; CHECK:       [[PRED_STORE_IF277]]:
+; CHECK-NEXT:    [[TMP426:%.*]] = add i64 [[INDEX]], 139
+; CHECK-NEXT:    [[TMP427:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP426]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP427]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE278]]
+; CHECK:       [[PRED_STORE_CONTINUE278]]:
+; CHECK-NEXT:    [[TMP428:%.*]] = extractelement <64 x i1> [[TMP2]], i32 12
+; CHECK-NEXT:    br i1 [[TMP428]], label %[[PRED_STORE_IF279:.*]], label %[[PRED_STORE_CONTINUE280:.*]]
+; CHECK:       [[PRED_STORE_IF279]]:
+; CHECK-NEXT:    [[TMP429:%.*]] = add i64 [[INDEX]], 140
+; CHECK-NEXT:    [[TMP430:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP429]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP430]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE280]]
+; CHECK:       [[PRED_STORE_CONTINUE280]]:
+; CHECK-NEXT:    [[TMP431:%.*]] = extractelement <64 x i1> [[TMP2]], i32 13
+; CHECK-NEXT:    br i1 [[TMP431]], label %[[PRED_STORE_IF281:.*]], label %[[PRED_STORE_CONTINUE282:.*]]
+; CHECK:       [[PRED_STORE_IF281]]:
+; CHECK-NEXT:    [[TMP432:%.*]] = add i64 [[INDEX]], 141
+; CHECK-NEXT:    [[TMP433:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP432]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP433]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE282]]
+; CHECK:       [[PRED_STORE_CONTINUE282]]:
+; CHECK-NEXT:    [[TMP434:%.*]] = extractelement <64 x i1> [[TMP2]], i32 14
+; CHECK-NEXT:    br i1 [[TMP434]], label %[[PRED_STORE_IF283:.*]], label %[[PRED_STORE_CONTINUE284:.*]]
+; CHECK:       [[PRED_STORE_IF283]]:
+; CHECK-NEXT:    [[TMP435:%.*]] = add i64 [[INDEX]], 142
+; CHECK-NEXT:    [[TMP436:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP435]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP436]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE284]]
+; CHECK:       [[PRED_STORE_CONTINUE284]]:
+; CHECK-NEXT:    [[TMP437:%.*]] = extractelement <64 x i1> [[TMP2]], i32 15
+; CHECK-NEXT:    br i1 [[TMP437]], label %[[PRED_STORE_IF285:.*]], label %[[PRED_STORE_CONTINUE286:.*]]
+; CHECK:       [[PRED_STORE_IF285]]:
+; CHECK-NEXT:    [[TMP438:%.*]] = add i64 [[INDEX]], 143
+; CHECK-NEXT:    [[TMP439:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP438]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP439]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE286]]
+; CHECK:       [[PRED_STORE_CONTINUE286]]:
+; CHECK-NEXT:    [[TMP440:%.*]] = extractelement <64 x i1> [[TMP2]], i32 16
+; CHECK-NEXT:    br i1 [[TMP440]], label %[[PRED_STORE_IF287:.*]], label %[[PRED_STORE_CONTINUE288:.*]]
+; CHECK:       [[PRED_STORE_IF287]]:
+; CHECK-NEXT:    [[TMP441:%.*]] = add i64 [[INDEX]], 144
+; CHECK-NEXT:    [[TMP442:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP441]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP442]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE288]]
+; CHECK:       [[PRED_STORE_CONTINUE288]]:
+; CHECK-NEXT:    [[TMP443:%.*]] = extractelement <64 x i1> [[TMP2]], i32 17
+; CHECK-NEXT:    br i1 [[TMP443]], label %[[PRED_STORE_IF289:.*]], label %[[PRED_STORE_CONTINUE290:.*]]
+; CHECK:       [[PRED_STORE_IF289]]:
+; CHECK-NEXT:    [[TMP444:%.*]] = add i64 [[INDEX]], 145
+; CHECK-NEXT:    [[TMP445:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP444]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP445]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE290]]
+; CHECK:       [[PRED_STORE_CONTINUE290]]:
+; CHECK-NEXT:    [[TMP446:%.*]] = extractelement <64 x i1> [[TMP2]], i32 18
+; CHECK-NEXT:    br i1 [[TMP446]], label %[[PRED_STORE_IF291:.*]], label %[[PRED_STORE_CONTINUE292:.*]]
+; CHECK:       [[PRED_STORE_IF291]]:
+; CHECK-NEXT:    [[TMP447:%.*]] = add i64 [[INDEX]], 146
+; CHECK-NEXT:    [[TMP448:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP447]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP448]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE292]]
+; CHECK:       [[PRED_STORE_CONTINUE292]]:
+; CHECK-NEXT:    [[TMP449:%.*]] = extractelement <64 x i1> [[TMP2]], i32 19
+; CHECK-NEXT:    br i1 [[TMP449]], label %[[PRED_STORE_IF293:.*]], label %[[PRED_STORE_CONTINUE294:.*]]
+; CHECK:       [[PRED_STORE_IF293]]:
+; CHECK-NEXT:    [[TMP450:%.*]] = add i64 [[INDEX]], 147
+; CHECK-NEXT:    [[TMP451:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP450]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP451]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE294]]
+; CHECK:       [[PRED_STORE_CONTINUE294]]:
+; CHECK-NEXT:    [[TMP452:%.*]] = extractelement <64 x i1> [[TMP2]], i32 20
+; CHECK-NEXT:    br i1 [[TMP452]], label %[[PRED_STORE_IF295:.*]], label %[[PRED_STORE_CONTINUE296:.*]]
+; CHECK:       [[PRED_STORE_IF295]]:
+; CHECK-NEXT:    [[TMP453:%.*]] = add i64 [[INDEX]], 148
+; CHECK-NEXT:    [[TMP454:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP453]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP454]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE296]]
+; CHECK:       [[PRED_STORE_CONTINUE296]]:
+; CHECK-NEXT:    [[TMP455:%.*]] = extractelement <64 x i1> [[TMP2]], i32 21
+; CHECK-NEXT:    br i1 [[TMP455]], label %[[PRED_STORE_IF297:.*]], label %[[PRED_STORE_CONTINUE298:.*]]
+; CHECK:       [[PRED_STORE_IF297]]:
+; CHECK-NEXT:    [[TMP456:%.*]] = add i64 [[INDEX]], 149
+; CHECK-NEXT:    [[TMP457:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP456]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP457]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE298]]
+; CHECK:       [[PRED_STORE_CONTINUE298]]:
+; CHECK-NEXT:    [[TMP458:%.*]] = extractelement <64 x i1> [[TMP2]], i32 22
+; CHECK-NEXT:    br i1 [[TMP458]], label %[[PRED_STORE_IF299:.*]], label %[[PRED_STORE_CONTINUE300:.*]]
+; CHECK:       [[PRED_STORE_IF299]]:
+; CHECK-NEXT:    [[TMP459:%.*]] = add i64 [[INDEX]], 150
+; CHECK-NEXT:    [[TMP460:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP459]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP460]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE300]]
+; CHECK:       [[PRED_STORE_CONTINUE300]]:
+; CHECK-NEXT:    [[TMP461:%.*]] = extractelement <64 x i1> [[TMP2]], i32 23
+; CHECK-NEXT:    br i1 [[TMP461]], label %[[PRED_STORE_IF301:.*]], label %[[PRED_STORE_CONTINUE302:.*]]
+; CHECK:       [[PRED_STORE_IF301]]:
+; CHECK-NEXT:    [[TMP462:%.*]] = add i64 [[INDEX]], 151
+; CHECK-NEXT:    [[TMP463:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP462]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP463]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE302]]
+; CHECK:       [[PRED_STORE_CONTINUE302]]:
+; CHECK-NEXT:    [[TMP464:%.*]] = extractelement <64 x i1> [[TMP2]], i32 24
+; CHECK-NEXT:    br i1 [[TMP464]], label %[[PRED_STORE_IF303:.*]], label %[[PRED_STORE_CONTINUE304:.*]]
+; CHECK:       [[PRED_STORE_IF303]]:
+; CHECK-NEXT:    [[TMP465:%.*]] = add i64 [[INDEX]], 152
+; CHECK-NEXT:    [[TMP466:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP465]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP466]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE304]]
+; CHECK:       [[PRED_STORE_CONTINUE304]]:
+; CHECK-NEXT:    [[TMP467:%.*]] = extractelement <64 x i1> [[TMP2]], i32 25
+; CHECK-NEXT:    br i1 [[TMP467]], label %[[PRED_STORE_IF305:.*]], label %[[PRED_STORE_CONTINUE306:.*]]
+; CHECK:       [[PRED_STORE_IF305]]:
+; CHECK-NEXT:    [[TMP468:%.*]] = add i64 [[INDEX]], 153
+; CHECK-NEXT:    [[TMP469:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP468]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP469]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE306]]
+; CHECK:       [[PRED_STORE_CONTINUE306]]:
+; CHECK-NEXT:    [[TMP470:%.*]] = extractelement <64 x i1> [[TMP2]], i32 26
+; CHECK-NEXT:    br i1 [[TMP470]], label %[[PRED_STORE_IF307:.*]], label %[[PRED_STORE_CONTINUE308:.*]]
+; CHECK:       [[PRED_STORE_IF307]]:
+; CHECK-NEXT:    [[TMP471:%.*]] = add i64 [[INDEX]], 154
+; CHECK-NEXT:    [[TMP472:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP471]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP472]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE308]]
+; CHECK:       [[PRED_STORE_CONTINUE308]]:
+; CHECK-NEXT:    [[TMP473:%.*]] = extractelement <64 x i1> [[TMP2]], i32 27
+; CHECK-NEXT:    br i1 [[TMP473]], label %[[PRED_STORE_IF309:.*]], label %[[PRED_STORE_CONTINUE310:.*]]
+; CHECK:       [[PRED_STORE_IF309]]:
+; CHECK-NEXT:    [[TMP474:%.*]] = add i64 [[INDEX]], 155
+; CHECK-NEXT:    [[TMP475:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP474]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP475]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE310]]
+; CHECK:       [[PRED_STORE_CONTINUE310]]:
+; CHECK-NEXT:    [[TMP476:%.*]] = extractelement <64 x i1> [[TMP2]], i32 28
+; CHECK-NEXT:    br i1 [[TMP476]], label %[[PRED_STORE_IF311:.*]], label %[[PRED_STORE_CONTINUE312:.*]]
+; CHECK:       [[PRED_STORE_IF311]]:
+; CHECK-NEXT:    [[TMP477:%.*]] = add i64 [[INDEX]], 156
+; CHECK-NEXT:    [[TMP478:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP477]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP478]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE312]]
+; CHECK:       [[PRED_STORE_CONTINUE312]]:
+; CHECK-NEXT:    [[TMP479:%.*]] = extractelement <64 x i1> [[TMP2]], i32 29
+; CHECK-NEXT:    br i1 [[TMP479]], label %[[PRED_STORE_IF313:.*]], label %[[PRED_STORE_CONTINUE314:.*]]
+; CHECK:       [[PRED_STORE_IF313]]:
+; CHECK-NEXT:    [[TMP480:%.*]] = add i64 [[INDEX]], 157
+; CHECK-NEXT:    [[TMP481:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP480]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP481]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE314]]
+; CHECK:       [[PRED_STORE_CONTINUE314]]:
+; CHECK-NEXT:    [[TMP482:%.*]] = extractelement <64 x i1> [[TMP2]], i32 30
+; CHECK-NEXT:    br i1 [[TMP482]], label %[[PRED_STORE_IF315:.*]], label %[[PRED_STORE_CONTINUE316:.*]]
+; CHECK:       [[PRED_STORE_IF315]]:
+; CHECK-NEXT:    [[TMP483:%.*]] = add i64 [[INDEX]], 158
+; CHECK-NEXT:    [[TMP484:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP483]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP484]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE316]]
+; CHECK:       [[PRED_STORE_CONTINUE316]]:
+; CHECK-NEXT:    [[TMP485:%.*]] = extractelement <64 x i1> [[TMP2]], i32 31
+; CHECK-NEXT:    br i1 [[TMP485]], label %[[PRED_STORE_IF317:.*]], label %[[PRED_STORE_CONTINUE318:.*]]
+; CHECK:       [[PRED_STORE_IF317]]:
+; CHECK-NEXT:    [[TMP486:%.*]] = add i64 [[INDEX]], 159
+; CHECK-NEXT:    [[TMP487:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP486]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP487]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE318]]
+; CHECK:       [[PRED_STORE_CONTINUE318]]:
+; CHECK-NEXT:    [[TMP488:%.*]] = extractelement <64 x i1> [[TMP2]], i32 32
+; CHECK-NEXT:    br i1 [[TMP488]], label %[[PRED_STORE_IF319:.*]], label %[[PRED_STORE_CONTINUE320:.*]]
+; CHECK:       [[PRED_STORE_IF319]]:
+; CHECK-NEXT:    [[TMP489:%.*]] = add i64 [[INDEX]], 160
+; CHECK-NEXT:    [[TMP490:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP489]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP490]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE320]]
+; CHECK:       [[PRED_STORE_CONTINUE320]]:
+; CHECK-NEXT:    [[TMP491:%.*]] = extractelement <64 x i1> [[TMP2]], i32 33
+; CHECK-NEXT:    br i1 [[TMP491]], label %[[PRED_STORE_IF321:.*]], label %[[PRED_STORE_CONTINUE322:.*]]
+; CHECK:       [[PRED_STORE_IF321]]:
+; CHECK-NEXT:    [[TMP492:%.*]] = add i64 [[INDEX]], 161
+; CHECK-NEXT:    [[TMP493:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP492]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP493]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE322]]
+; CHECK:       [[PRED_STORE_CONTINUE322]]:
+; CHECK-NEXT:    [[TMP494:%.*]] = extractelement <64 x i1> [[TMP2]], i32 34
+; CHECK-NEXT:    br i1 [[TMP494]], label %[[PRED_STORE_IF323:.*]], label %[[PRED_STORE_CONTINUE324:.*]]
+; CHECK:       [[PRED_STORE_IF323]]:
+; CHECK-NEXT:    [[TMP495:%.*]] = add i64 [[INDEX]], 162
+; CHECK-NEXT:    [[TMP496:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP495]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP496]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE324]]
+; CHECK:       [[PRED_STORE_CONTINUE324]]:
+; CHECK-NEXT:    [[TMP497:%.*]] = extractelement <64 x i1> [[TMP2]], i32 35
+; CHECK-NEXT:    br i1 [[TMP497]], label %[[PRED_STORE_IF325:.*]], label %[[PRED_STORE_CONTINUE326:.*]]
+; CHECK:       [[PRED_STORE_IF325]]:
+; CHECK-NEXT:    [[TMP498:%.*]] = add i64 [[INDEX]], 163
+; CHECK-NEXT:    [[TMP499:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP498]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP499]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE326]]
+; CHECK:       [[PRED_STORE_CONTINUE326]]:
+; CHECK-NEXT:    [[TMP500:%.*]] = extractelement <64 x i1> [[TMP2]], i32 36
+; CHECK-NEXT:    br i1 [[TMP500]], label %[[PRED_STORE_IF327:.*]], label %[[PRED_STORE_CONTINUE328:.*]]
+; CHECK:       [[PRED_STORE_IF327]]:
+; CHECK-NEXT:    [[TMP501:%.*]] = add i64 [[INDEX]], 164
+; CHECK-NEXT:    [[TMP502:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP501]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP502]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE328]]
+; CHECK:       [[PRED_STORE_CONTINUE328]]:
+; CHECK-NEXT:    [[TMP503:%.*]] = extractelement <64 x i1> [[TMP2]], i32 37
+; CHECK-NEXT:    br i1 [[TMP503]], label %[[PRED_STORE_IF329:.*]], label %[[PRED_STORE_CONTINUE330:.*]]
+; CHECK:       [[PRED_STORE_IF329]]:
+; CHECK-NEXT:    [[TMP504:%.*]] = add i64 [[INDEX]], 165
+; CHECK-NEXT:    [[TMP505:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP504]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP505]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE330]]
+; CHECK:       [[PRED_STORE_CONTINUE330]]:
+; CHECK-NEXT:    [[TMP506:%.*]] = extractelement <64 x i1> [[TMP2]], i32 38
+; CHECK-NEXT:    br i1 [[TMP506]], label %[[PRED_STORE_IF331:.*]], label %[[PRED_STORE_CONTINUE332:.*]]
+; CHECK:       [[PRED_STORE_IF331]]:
+; CHECK-NEXT:    [[TMP507:%.*]] = add i64 [[INDEX]], 166
+; CHECK-NEXT:    [[TMP508:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP507]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP508]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE332]]
+; CHECK:       [[PRED_STORE_CONTINUE332]]:
+; CHECK-NEXT:    [[TMP509:%.*]] = extractelement <64 x i1> [[TMP2]], i32 39
+; CHECK-NEXT:    br i1 [[TMP509]], label %[[PRED_STORE_IF333:.*]], label %[[PRED_STORE_CONTINUE334:.*]]
+; CHECK:       [[PRED_STORE_IF333]]:
+; CHECK-NEXT:    [[TMP510:%.*]] = add i64 [[INDEX]], 167
+; CHECK-NEXT:    [[TMP511:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP510]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP511]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE334]]
+; CHECK:       [[PRED_STORE_CONTINUE334]]:
+; CHECK-NEXT:    [[TMP512:%.*]] = extractelement <64 x i1> [[TMP2]], i32 40
+; CHECK-NEXT:    br i1 [[TMP512]], label %[[PRED_STORE_IF335:.*]], label %[[PRED_STORE_CONTINUE336:.*]]
+; CHECK:       [[PRED_STORE_IF335]]:
+; CHECK-NEXT:    [[TMP513:%.*]] = add i64 [[INDEX]], 168
+; CHECK-NEXT:    [[TMP514:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP513]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP514]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE336]]
+; CHECK:       [[PRED_STORE_CONTINUE336]]:
+; CHECK-NEXT:    [[TMP515:%.*]] = extractelement <64 x i1> [[TMP2]], i32 41
+; CHECK-NEXT:    br i1 [[TMP515]], label %[[PRED_STORE_IF337:.*]], label %[[PRED_STORE_CONTINUE338:.*]]
+; CHECK:       [[PRED_STORE_IF337]]:
+; CHECK-NEXT:    [[TMP516:%.*]] = add i64 [[INDEX]], 169
+; CHECK-NEXT:    [[TMP517:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP516]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP517]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE338]]
+; CHECK:       [[PRED_STORE_CONTINUE338]]:
+; CHECK-NEXT:    [[TMP518:%.*]] = extractelement <64 x i1> [[TMP2]], i32 42
+; CHECK-NEXT:    br i1 [[TMP518]], label %[[PRED_STORE_IF339:.*]], label %[[PRED_STORE_CONTINUE340:.*]]
+; CHECK:       [[PRED_STORE_IF339]]:
+; CHECK-NEXT:    [[TMP519:%.*]] = add i64 [[INDEX]], 170
+; CHECK-NEXT:    [[TMP520:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP519]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP520]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE340]]
+; CHECK:       [[PRED_STORE_CONTINUE340]]:
+; CHECK-NEXT:    [[TMP521:%.*]] = extractelement <64 x i1> [[TMP2]], i32 43
+; CHECK-NEXT:    br i1 [[TMP521]], label %[[PRED_STORE_IF341:.*]], label %[[PRED_STORE_CONTINUE342:.*]]
+; CHECK:       [[PRED_STORE_IF341]]:
+; CHECK-NEXT:    [[TMP522:%.*]] = add i64 [[INDEX]], 171
+; CHECK-NEXT:    [[TMP523:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP522]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP523]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE342]]
+; CHECK:       [[PRED_STORE_CONTINUE342]]:
+; CHECK-NEXT:    [[TMP524:%.*]] = extractelement <64 x i1> [[TMP2]], i32 44
+; CHECK-NEXT:    br i1 [[TMP524]], label %[[PRED_STORE_IF343:.*]], label %[[PRED_STORE_CONTINUE344:.*]]
+; CHECK:       [[PRED_STORE_IF343]]:
+; CHECK-NEXT:    [[TMP525:%.*]] = add i64 [[INDEX]], 172
+; CHECK-NEXT:    [[TMP526:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP525]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP526]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE344]]
+; CHECK:       [[PRED_STORE_CONTINUE344]]:
+; CHECK-NEXT:    [[TMP527:%.*]] = extractelement <64 x i1> [[TMP2]], i32 45
+; CHECK-NEXT:    br i1 [[TMP527]], label %[[PRED_STORE_IF345:.*]], label %[[PRED_STORE_CONTINUE346:.*]]
+; CHECK:       [[PRED_STORE_IF345]]:
+; CHECK-NEXT:    [[TMP528:%.*]] = add i64 [[INDEX]], 173
+; CHECK-NEXT:    [[TMP529:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP528]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP529]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE346]]
+; CHECK:       [[PRED_STORE_CONTINUE346]]:
+; CHECK-NEXT:    [[TMP530:%.*]] = extractelement <64 x i1> [[TMP2]], i32 46
+; CHECK-NEXT:    br i1 [[TMP530]], label %[[PRED_STORE_IF347:.*]], label %[[PRED_STORE_CONTINUE348:.*]]
+; CHECK:       [[PRED_STORE_IF347]]:
+; CHECK-NEXT:    [[TMP531:%.*]] = add i64 [[INDEX]], 174
+; CHECK-NEXT:    [[TMP532:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP531]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP532]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE348]]
+; CHECK:       [[PRED_STORE_CONTINUE348]]:
+; CHECK-NEXT:    [[TMP533:%.*]] = extractelement <64 x i1> [[TMP2]], i32 47
+; CHECK-NEXT:    br i1 [[TMP533]], label %[[PRED_STORE_IF349:.*]], label %[[PRED_STORE_CONTINUE350:.*]]
+; CHECK:       [[PRED_STORE_IF349]]:
+; CHECK-NEXT:    [[TMP534:%.*]] = add i64 [[INDEX]], 175
+; CHECK-NEXT:    [[TMP535:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP534]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP535]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE350]]
+; CHECK:       [[PRED_STORE_CONTINUE350]]:
+; CHECK-NEXT:    [[TMP536:%.*]] = extractelement <64 x i1> [[TMP2]], i32 48
+; CHECK-NEXT:    br i1 [[TMP536]], label %[[PRED_STORE_IF351:.*]], label %[[PRED_STORE_CONTINUE352:.*]]
+; CHECK:       [[PRED_STORE_IF351]]:
+; CHECK-NEXT:    [[TMP537:%.*]] = add i64 [[INDEX]], 176
+; CHECK-NEXT:    [[TMP538:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP537]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP538]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE352]]
+; CHECK:       [[PRED_STORE_CONTINUE352]]:
+; CHECK-NEXT:    [[TMP539:%.*]] = extractelement <64 x i1> [[TMP2]], i32 49
+; CHECK-NEXT:    br i1 [[TMP539]], label %[[PRED_STORE_IF353:.*]], label %[[PRED_STORE_CONTINUE354:.*]]
+; CHECK:       [[PRED_STORE_IF353]]:
+; CHECK-NEXT:    [[TMP540:%.*]] = add i64 [[INDEX]], 177
+; CHECK-NEXT:    [[TMP541:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP540]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP541]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE354]]
+; CHECK:       [[PRED_STORE_CONTINUE354]]:
+; CHECK-NEXT:    [[TMP542:%.*]] = extractelement <64 x i1> [[TMP2]], i32 50
+; CHECK-NEXT:    br i1 [[TMP542]], label %[[PRED_STORE_IF355:.*]], label %[[PRED_STORE_CONTINUE356:.*]]
+; CHECK:       [[PRED_STORE_IF355]]:
+; CHECK-NEXT:    [[TMP543:%.*]] = add i64 [[INDEX]], 178
+; CHECK-NEXT:    [[TMP544:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP543]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP544]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE356]]
+; CHECK:       [[PRED_STORE_CONTINUE356]]:
+; CHECK-NEXT:    [[TMP545:%.*]] = extractelement <64 x i1> [[TMP2]], i32 51
+; CHECK-NEXT:    br i1 [[TMP545]], label %[[PRED_STORE_IF357:.*]], label %[[PRED_STORE_CONTINUE358:.*]]
+; CHECK:       [[PRED_STORE_IF357]]:
+; CHECK-NEXT:    [[TMP546:%.*]] = add i64 [[INDEX]], 179
+; CHECK-NEXT:    [[TMP547:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP546]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP547]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE358]]
+; CHECK:       [[PRED_STORE_CONTINUE358]]:
+; CHECK-NEXT:    [[TMP548:%.*]] = extractelement <64 x i1> [[TMP2]], i32 52
+; CHECK-NEXT:    br i1 [[TMP548]], label %[[PRED_STORE_IF359:.*]], label %[[PRED_STORE_CONTINUE360:.*]]
+; CHECK:       [[PRED_STORE_IF359]]:
+; CHECK-NEXT:    [[TMP549:%.*]] = add i64 [[INDEX]], 180
+; CHECK-NEXT:    [[TMP550:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP549]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP550]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE360]]
+; CHECK:       [[PRED_STORE_CONTINUE360]]:
+; CHECK-NEXT:    [[TMP551:%.*]] = extractelement <64 x i1> [[TMP2]], i32 53
+; CHECK-NEXT:    br i1 [[TMP551]], label %[[PRED_STORE_IF361:.*]], label %[[PRED_STORE_CONTINUE362:.*]]
+; CHECK:       [[PRED_STORE_IF361]]:
+; CHECK-NEXT:    [[TMP552:%.*]] = add i64 [[INDEX]], 181
+; CHECK-NEXT:    [[TMP553:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP552]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP553]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE362]]
+; CHECK:       [[PRED_STORE_CONTINUE362]]:
+; CHECK-NEXT:    [[TMP554:%.*]] = extractelement <64 x i1> [[TMP2]], i32 54
+; CHECK-NEXT:    br i1 [[TMP554]], label %[[PRED_STORE_IF363:.*]], label %[[PRED_STORE_CONTINUE364:.*]]
+; CHECK:       [[PRED_STORE_IF363]]:
+; CHECK-NEXT:    [[TMP555:%.*]] = add i64 [[INDEX]], 182
+; CHECK-NEXT:    [[TMP556:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP555]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP556]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE364]]
+; CHECK:       [[PRED_STORE_CONTINUE364]]:
+; CHECK-NEXT:    [[TMP557:%.*]] = extractelement <64 x i1> [[TMP2]], i32 55
+; CHECK-NEXT:    br i1 [[TMP557]], label %[[PRED_STORE_IF365:.*]], label %[[PRED_STORE_CONTINUE366:.*]]
+; CHECK:       [[PRED_STORE_IF365]]:
+; CHECK-NEXT:    [[TMP558:%.*]] = add i64 [[INDEX]], 183
+; CHECK-NEXT:    [[TMP559:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP558]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP559]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE366]]
+; CHECK:       [[PRED_STORE_CONTINUE366]]:
+; CHECK-NEXT:    [[TMP560:%.*]] = extractelement <64 x i1> [[TMP2]], i32 56
+; CHECK-NEXT:    br i1 [[TMP560]], label %[[PRED_STORE_IF367:.*]], label %[[PRED_STORE_CONTINUE368:.*]]
+; CHECK:       [[PRED_STORE_IF367]]:
+; CHECK-NEXT:    [[TMP561:%.*]] = add i64 [[INDEX]], 184
+; CHECK-NEXT:    [[TMP562:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP561]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP562]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE368]]
+; CHECK:       [[PRED_STORE_CONTINUE368]]:
+; CHECK-NEXT:    [[TMP563:%.*]] = extractelement <64 x i1> [[TMP2]], i32 57
+; CHECK-NEXT:    br i1 [[TMP563]], label %[[PRED_STORE_IF369:.*]], label %[[PRED_STORE_CONTINUE370:.*]]
+; CHECK:       [[PRED_STORE_IF369]]:
+; CHECK-NEXT:    [[TMP564:%.*]] = add i64 [[INDEX]], 185
+; CHECK-NEXT:    [[TMP565:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP564]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP565]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE370]]
+; CHECK:       [[PRED_STORE_CONTINUE370]]:
+; CHECK-NEXT:    [[TMP566:%.*]] = extractelement <64 x i1> [[TMP2]], i32 58
+; CHECK-NEXT:    br i1 [[TMP566]], label %[[PRED_STORE_IF371:.*]], label %[[PRED_STORE_CONTINUE372:.*]]
+; CHECK:       [[PRED_STORE_IF371]]:
+; CHECK-NEXT:    [[TMP567:%.*]] = add i64 [[INDEX]], 186
+; CHECK-NEXT:    [[TMP568:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP567]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP568]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE372]]
+; CHECK:       [[PRED_STORE_CONTINUE372]]:
+; CHECK-NEXT:    [[TMP569:%.*]] = extractelement <64 x i1> [[TMP2]], i32 59
+; CHECK-NEXT:    br i1 [[TMP569]], label %[[PRED_STORE_IF373:.*]], label %[[PRED_STORE_CONTINUE374:.*]]
+; CHECK:       [[PRED_STORE_IF373]]:
+; CHECK-NEXT:    [[TMP570:%.*]] = add i64 [[INDEX]], 187
+; CHECK-NEXT:    [[TMP571:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP570]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP571]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE374]]
+; CHECK:       [[PRED_STORE_CONTINUE374]]:
+; CHECK-NEXT:    [[TMP572:%.*]] = extractelement <64 x i1> [[TMP2]], i32 60
+; CHECK-NEXT:    br i1 [[TMP572]], label %[[PRED_STORE_IF375:.*]], label %[[PRED_STORE_CONTINUE376:.*]]
+; CHECK:       [[PRED_STORE_IF375]]:
+; CHECK-NEXT:    [[TMP573:%.*]] = add i64 [[INDEX]], 188
+; CHECK-NEXT:    [[TMP574:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP573]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP574]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE376]]
+; CHECK:       [[PRED_STORE_CONTINUE376]]:
+; CHECK-NEXT:    [[TMP575:%.*]] = extractelement <64 x i1> [[TMP2]], i32 61
+; CHECK-NEXT:    br i1 [[TMP575]], label %[[PRED_STORE_IF377:.*]], label %[[PRED_STORE_CONTINUE378:.*]]
+; CHECK:       [[PRED_STORE_IF377]]:
+; CHECK-NEXT:    [[TMP576:%.*]] = add i64 [[INDEX]], 189
+; CHECK-NEXT:    [[TMP577:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP576]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP577]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE378]]
+; CHECK:       [[PRED_STORE_CONTINUE378]]:
+; CHECK-NEXT:    [[TMP578:%.*]] = extractelement <64 x i1> [[TMP2]], i32 62
+; CHECK-NEXT:    br i1 [[TMP578]], label %[[PRED_STORE_IF379:.*]], label %[[PRED_STORE_CONTINUE380:.*]]
+; CHECK:       [[PRED_STORE_IF379]]:
+; CHECK-NEXT:    [[TMP579:%.*]] = add i64 [[INDEX]], 190
+; CHECK-NEXT:    [[TMP580:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP579]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP580]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE380]]
+; CHECK:       [[PRED_STORE_CONTINUE380]]:
+; CHECK-NEXT:    [[TMP581:%.*]] = extractelement <64 x i1> [[TMP2]], i32 63
+; CHECK-NEXT:    br i1 [[TMP581]], label %[[PRED_STORE_IF381:.*]], label %[[PRED_STORE_CONTINUE382:.*]]
+; CHECK:       [[PRED_STORE_IF381]]:
+; CHECK-NEXT:    [[TMP582:%.*]] = add i64 [[INDEX]], 191
+; CHECK-NEXT:    [[TMP583:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP582]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP583]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE382]]
+; CHECK:       [[PRED_STORE_CONTINUE382]]:
+; CHECK-NEXT:    [[TMP584:%.*]] = extractelement <64 x i1> [[TMP3]], i32 0
+; CHECK-NEXT:    br i1 [[TMP584]], label %[[PRED_STORE_IF383:.*]], label %[[PRED_STORE_CONTINUE384:.*]]
+; CHECK:       [[PRED_STORE_IF383]]:
+; CHECK-NEXT:    [[TMP585:%.*]] = add i64 [[INDEX]], 192
+; CHECK-NEXT:    [[TMP586:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP585]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP586]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE384]]
+; CHECK:       [[PRED_STORE_CONTINUE384]]:
+; CHECK-NEXT:    [[TMP587:%.*]] = extractelement <64 x i1> [[TMP3]], i32 1
+; CHECK-NEXT:    br i1 [[TMP587]], label %[[PRED_STORE_IF385:.*]], label %[[PRED_STORE_CONTINUE386:.*]]
+; CHECK:       [[PRED_STORE_IF385]]:
+; CHECK-NEXT:    [[TMP588:%.*]] = add i64 [[INDEX]], 193
+; CHECK-NEXT:    [[TMP589:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP588]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP589]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE386]]
+; CHECK:       [[PRED_STORE_CONTINUE386]]:
+; CHECK-NEXT:    [[TMP590:%.*]] = extractelement <64 x i1> [[TMP3]], i32 2
+; CHECK-NEXT:    br i1 [[TMP590]], label %[[PRED_STORE_IF387:.*]], label %[[PRED_STORE_CONTINUE388:.*]]
+; CHECK:       [[PRED_STORE_IF387]]:
+; CHECK-NEXT:    [[TMP591:%.*]] = add i64 [[INDEX]], 194
+; CHECK-NEXT:    [[TMP592:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP591]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP592]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE388]]
+; CHECK:       [[PRED_STORE_CONTINUE388]]:
+; CHECK-NEXT:    [[TMP593:%.*]] = extractelement <64 x i1> [[TMP3]], i32 3
+; CHECK-NEXT:    br i1 [[TMP593]], label %[[PRED_STORE_IF389:.*]], label %[[PRED_STORE_CONTINUE390:.*]]
+; CHECK:       [[PRED_STORE_IF389]]:
+; CHECK-NEXT:    [[TMP594:%.*]] = add i64 [[INDEX]], 195
+; CHECK-NEXT:    [[TMP595:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP594]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP595]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE390]]
+; CHECK:       [[PRED_STORE_CONTINUE390]]:
+; CHECK-NEXT:    [[TMP596:%.*]] = extractelement <64 x i1> [[TMP3]], i32 4
+; CHECK-NEXT:    br i1 [[TMP596]], label %[[PRED_STORE_IF391:.*]], label %[[PRED_STORE_CONTINUE392:.*]]
+; CHECK:       [[PRED_STORE_IF391]]:
+; CHECK-NEXT:    [[TMP597:%.*]] = add i64 [[INDEX]], 196
+; CHECK-NEXT:    [[TMP598:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP597]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP598]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE392]]
+; CHECK:       [[PRED_STORE_CONTINUE392]]:
+; CHECK-NEXT:    [[TMP599:%.*]] = extractelement <64 x i1> [[TMP3]], i32 5
+; CHECK-NEXT:    br i1 [[TMP599]], label %[[PRED_STORE_IF393:.*]], label %[[PRED_STORE_CONTINUE394:.*]]
+; CHECK:       [[PRED_STORE_IF393]]:
+; CHECK-NEXT:    [[TMP600:%.*]] = add i64 [[INDEX]], 197
+; CHECK-NEXT:    [[TMP601:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP600]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP601]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE394]]
+; CHECK:       [[PRED_STORE_CONTINUE394]]:
+; CHECK-NEXT:    [[TMP602:%.*]] = extractelement <64 x i1> [[TMP3]], i32 6
+; CHECK-NEXT:    br i1 [[TMP602]], label %[[PRED_STORE_IF395:.*]], label %[[PRED_STORE_CONTINUE396:.*]]
+; CHECK:       [[PRED_STORE_IF395]]:
+; CHECK-NEXT:    [[TMP603:%.*]] = add i64 [[INDEX]], 198
+; CHECK-NEXT:    [[TMP604:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP603]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP604]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE396]]
+; CHECK:       [[PRED_STORE_CONTINUE396]]:
+; CHECK-NEXT:    [[TMP605:%.*]] = extractelement <64 x i1> [[TMP3]], i32 7
+; CHECK-NEXT:    br i1 [[TMP605]], label %[[PRED_STORE_IF397:.*]], label %[[PRED_STORE_CONTINUE398:.*]]
+; CHECK:       [[PRED_STORE_IF397]]:
+; CHECK-NEXT:    [[TMP606:%.*]] = add i64 [[INDEX]], 199
+; CHECK-NEXT:    [[TMP607:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP606]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP607]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE398]]
+; CHECK:       [[PRED_STORE_CONTINUE398]]:
+; CHECK-NEXT:    [[TMP608:%.*]] = extractelement <64 x i1> [[TMP3]], i32 8
+; CHECK-NEXT:    br i1 [[TMP608]], label %[[PRED_STORE_IF399:.*]], label %[[PRED_STORE_CONTINUE400:.*]]
+; CHECK:       [[PRED_STORE_IF399]]:
+; CHECK-NEXT:    [[TMP609:%.*]] = add i64 [[INDEX]], 200
+; CHECK-NEXT:    [[TMP610:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP609]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP610]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE400]]
+; CHECK:       [[PRED_STORE_CONTINUE400]]:
+; CHECK-NEXT:    [[TMP611:%.*]] = extractelement <64 x i1> [[TMP3]], i32 9
+; CHECK-NEXT:    br i1 [[TMP611]], label %[[PRED_STORE_IF401:.*]], label %[[PRED_STORE_CONTINUE402:.*]]
+; CHECK:       [[PRED_STORE_IF401]]:
+; CHECK-NEXT:    [[TMP612:%.*]] = add i64 [[INDEX]], 201
+; CHECK-NEXT:    [[TMP613:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP612]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP613]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE402]]
+; CHECK:       [[PRED_STORE_CONTINUE402]]:
+; CHECK-NEXT:    [[TMP614:%.*]] = extractelement <64 x i1> [[TMP3]], i32 10
+; CHECK-NEXT:    br i1 [[TMP614]], label %[[PRED_STORE_IF403:.*]], label %[[PRED_STORE_CONTINUE404:.*]]
+; CHECK:       [[PRED_STORE_IF403]]:
+; CHECK-NEXT:    [[TMP615:%.*]] = add i64 [[INDEX]], 202
+; CHECK-NEXT:    [[TMP616:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP615]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP616]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE404]]
+; CHECK:       [[PRED_STORE_CONTINUE404]]:
+; CHECK-NEXT:    [[TMP617:%.*]] = extractelement <64 x i1> [[TMP3]], i32 11
+; CHECK-NEXT:    br i1 [[TMP617]], label %[[PRED_STORE_IF405:.*]], label %[[PRED_STORE_CONTINUE406:.*]]
+; CHECK:       [[PRED_STORE_IF405]]:
+; CHECK-NEXT:    [[TMP618:%.*]] = add i64 [[INDEX]], 203
+; CHECK-NEXT:    [[TMP619:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP618]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP619]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE406]]
+; CHECK:       [[PRED_STORE_CONTINUE406]]:
+; CHECK-NEXT:    [[TMP620:%.*]] = extractelement <64 x i1> [[TMP3]], i32 12
+; CHECK-NEXT:    br i1 [[TMP620]], label %[[PRED_STORE_IF407:.*]], label %[[PRED_STORE_CONTINUE408:.*]]
+; CHECK:       [[PRED_STORE_IF407]]:
+; CHECK-NEXT:    [[TMP621:%.*]] = add i64 [[INDEX]], 204
+; CHECK-NEXT:    [[TMP622:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP621]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP622]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE408]]
+; CHECK:       [[PRED_STORE_CONTINUE408]]:
+; CHECK-NEXT:    [[TMP623:%.*]] = extractelement <64 x i1> [[TMP3]], i32 13
+; CHECK-NEXT:    br i1 [[TMP623]], label %[[PRED_STORE_IF409:.*]], label %[[PRED_STORE_CONTINUE410:.*]]
+; CHECK:       [[PRED_STORE_IF409]]:
+; CHECK-NEXT:    [[TMP624:%.*]] = add i64 [[INDEX]], 205
+; CHECK-NEXT:    [[TMP625:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP624]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP625]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE410]]
+; CHECK:       [[PRED_STORE_CONTINUE410]]:
+; CHECK-NEXT:    [[TMP626:%.*]] = extractelement <64 x i1> [[TMP3]], i32 14
+; CHECK-NEXT:    br i1 [[TMP626]], label %[[PRED_STORE_IF411:.*]], label %[[PRED_STORE_CONTINUE412:.*]]
+; CHECK:       [[PRED_STORE_IF411]]:
+; CHECK-NEXT:    [[TMP627:%.*]] = add i64 [[INDEX]], 206
+; CHECK-NEXT:    [[TMP628:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP627]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP628]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE412]]
+; CHECK:       [[PRED_STORE_CONTINUE412]]:
+; CHECK-NEXT:    [[TMP629:%.*]] = extractelement <64 x i1> [[TMP3]], i32 15
+; CHECK-NEXT:    br i1 [[TMP629]], label %[[PRED_STORE_IF413:.*]], label %[[PRED_STORE_CONTINUE414:.*]]
+; CHECK:       [[PRED_STORE_IF413]]:
+; CHECK-NEXT:    [[TMP630:%.*]] = add i64 [[INDEX]], 207
+; CHECK-NEXT:    [[TMP631:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP630]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP631]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE414]]
+; CHECK:       [[PRED_STORE_CONTINUE414]]:
+; CHECK-NEXT:    [[TMP632:%.*]] = extractelement <64 x i1> [[TMP3]], i32 16
+; CHECK-NEXT:    br i1 [[TMP632]], label %[[PRED_STORE_IF415:.*]], label %[[PRED_STORE_CONTINUE416:.*]]
+; CHECK:       [[PRED_STORE_IF415]]:
+; CHECK-NEXT:    [[TMP633:%.*]] = add i64 [[INDEX]], 208
+; CHECK-NEXT:    [[TMP634:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP633]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP634]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE416]]
+; CHECK:       [[PRED_STORE_CONTINUE416]]:
+; CHECK-NEXT:    [[TMP635:%.*]] = extractelement <64 x i1> [[TMP3]], i32 17
+; CHECK-NEXT:    br i1 [[TMP635]], label %[[PRED_STORE_IF417:.*]], label %[[PRED_STORE_CONTINUE418:.*]]
+; CHECK:       [[PRED_STORE_IF417]]:
+; CHECK-NEXT:    [[TMP636:%.*]] = add i64 [[INDEX]], 209
+; CHECK-NEXT:    [[TMP637:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP636]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP637]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE418]]
+; CHECK:       [[PRED_STORE_CONTINUE418]]:
+; CHECK-NEXT:    [[TMP638:%.*]] = extractelement <64 x i1> [[TMP3]], i32 18
+; CHECK-NEXT:    br i1 [[TMP638]], label %[[PRED_STORE_IF419:.*]], label %[[PRED_STORE_CONTINUE420:.*]]
+; CHECK:       [[PRED_STORE_IF419]]:
+; CHECK-NEXT:    [[TMP639:%.*]] = add i64 [[INDEX]], 210
+; CHECK-NEXT:    [[TMP640:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP639]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP640]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE420]]
+; CHECK:       [[PRED_STORE_CONTINUE420]]:
+; CHECK-NEXT:    [[TMP641:%.*]] = extractelement <64 x i1> [[TMP3]], i32 19
+; CHECK-NEXT:    br i1 [[TMP641]], label %[[PRED_STORE_IF421:.*]], label %[[PRED_STORE_CONTINUE422:.*]]
+; CHECK:       [[PRED_STORE_IF421]]:
+; CHECK-NEXT:    [[TMP642:%.*]] = add i64 [[INDEX]], 211
+; CHECK-NEXT:    [[TMP643:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP642]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP643]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE422]]
+; CHECK:       [[PRED_STORE_CONTINUE422]]:
+; CHECK-NEXT:    [[TMP644:%.*]] = extractelement <64 x i1> [[TMP3]], i32 20
+; CHECK-NEXT:    br i1 [[TMP644]], label %[[PRED_STORE_IF423:.*]], label %[[PRED_STORE_CONTINUE424:.*]]
+; CHECK:       [[PRED_STORE_IF423]]:
+; CHECK-NEXT:    [[TMP645:%.*]] = add i64 [[INDEX]], 212
+; CHECK-NEXT:    [[TMP646:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP645]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP646]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE424]]
+; CHECK:       [[PRED_STORE_CONTINUE424]]:
+; CHECK-NEXT:    [[TMP647:%.*]] = extractelement <64 x i1> [[TMP3]], i32 21
+; CHECK-NEXT:    br i1 [[TMP647]], label %[[PRED_STORE_IF425:.*]], label %[[PRED_STORE_CONTINUE426:.*]]
+; CHECK:       [[PRED_STORE_IF425]]:
+; CHECK-NEXT:    [[TMP648:%.*]] = add i64 [[INDEX]], 213
+; CHECK-NEXT:    [[TMP649:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP648]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP649]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE426]]
+; CHECK:       [[PRED_STORE_CONTINUE426]]:
+; CHECK-NEXT:    [[TMP650:%.*]] = extractelement <64 x i1> [[TMP3]], i32 22
+; CHECK-NEXT:    br i1 [[TMP650]], label %[[PRED_STORE_IF427:.*]], label %[[PRED_STORE_CONTINUE428:.*]]
+; CHECK:       [[PRED_STORE_IF427]]:
+; CHECK-NEXT:    [[TMP651:%.*]] = add i64 [[INDEX]], 214
+; CHECK-NEXT:    [[TMP652:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP651]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP652]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE428]]
+; CHECK:       [[PRED_STORE_CONTINUE428]]:
+; CHECK-NEXT:    [[TMP653:%.*]] = extractelement <64 x i1> [[TMP3]], i32 23
+; CHECK-NEXT:    br i1 [[TMP653]], label %[[PRED_STORE_IF429:.*]], label %[[PRED_STORE_CONTINUE430:.*]]
+; CHECK:       [[PRED_STORE_IF429]]:
+; CHECK-NEXT:    [[TMP654:%.*]] = add i64 [[INDEX]], 215
+; CHECK-NEXT:    [[TMP655:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP654]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP655]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE430]]
+; CHECK:       [[PRED_STORE_CONTINUE430]]:
+; CHECK-NEXT:    [[TMP656:%.*]] = extractelement <64 x i1> [[TMP3]], i32 24
+; CHECK-NEXT:    br i1 [[TMP656]], label %[[PRED_STORE_IF431:.*]], label %[[PRED_STORE_CONTINUE432:.*]]
+; CHECK:       [[PRED_STORE_IF431]]:
+; CHECK-NEXT:    [[TMP657:%.*]] = add i64 [[INDEX]], 216
+; CHECK-NEXT:    [[TMP658:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP657]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP658]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE432]]
+; CHECK:       [[PRED_STORE_CONTINUE432]]:
+; CHECK-NEXT:    [[TMP659:%.*]] = extractelement <64 x i1> [[TMP3]], i32 25
+; CHECK-NEXT:    br i1 [[TMP659]], label %[[PRED_STORE_IF433:.*]], label %[[PRED_STORE_CONTINUE434:.*]]
+; CHECK:       [[PRED_STORE_IF433]]:
+; CHECK-NEXT:    [[TMP660:%.*]] = add i64 [[INDEX]], 217
+; CHECK-NEXT:    [[TMP661:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP660]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP661]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE434]]
+; CHECK:       [[PRED_STORE_CONTINUE434]]:
+; CHECK-NEXT:    [[TMP662:%.*]] = extractelement <64 x i1> [[TMP3]], i32 26
+; CHECK-NEXT:    br i1 [[TMP662]], label %[[PRED_STORE_IF435:.*]], label %[[PRED_STORE_CONTINUE436:.*]]
+; CHECK:       [[PRED_STORE_IF435]]:
+; CHECK-NEXT:    [[TMP663:%.*]] = add i64 [[INDEX]], 218
+; CHECK-NEXT:    [[TMP664:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP663]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP664]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE436]]
+; CHECK:       [[PRED_STORE_CONTINUE436]]:
+; CHECK-NEXT:    [[TMP665:%.*]] = extractelement <64 x i1> [[TMP3]], i32 27
+; CHECK-NEXT:    br i1 [[TMP665]], label %[[PRED_STORE_IF437:.*]], label %[[PRED_STORE_CONTINUE438:.*]]
+; CHECK:       [[PRED_STORE_IF437]]:
+; CHECK-NEXT:    [[TMP666:%.*]] = add i64 [[INDEX]], 219
+; CHECK-NEXT:    [[TMP667:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP666]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP667]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE438]]
+; CHECK:       [[PRED_STORE_CONTINUE438]]:
+; CHECK-NEXT:    [[TMP668:%.*]] = extractelement <64 x i1> [[TMP3]], i32 28
+; CHECK-NEXT:    br i1 [[TMP668]], label %[[PRED_STORE_IF439:.*]], label %[[PRED_STORE_CONTINUE440:.*]]
+; CHECK:       [[PRED_STORE_IF439]]:
+; CHECK-NEXT:    [[TMP669:%.*]] = add i64 [[INDEX]], 220
+; CHECK-NEXT:    [[TMP670:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP669]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP670]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE440]]
+; CHECK:       [[PRED_STORE_CONTINUE440]]:
+; CHECK-NEXT:    [[TMP671:%.*]] = extractelement <64 x i1> [[TMP3]], i32 29
+; CHECK-NEXT:    br i1 [[TMP671]], label %[[PRED_STORE_IF441:.*]], label %[[PRED_STORE_CONTINUE442:.*]]
+; CHECK:       [[PRED_STORE_IF441]]:
+; CHECK-NEXT:    [[TMP672:%.*]] = add i64 [[INDEX]], 221
+; CHECK-NEXT:    [[TMP673:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP672]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP673]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE442]]
+; CHECK:       [[PRED_STORE_CONTINUE442]]:
+; CHECK-NEXT:    [[TMP674:%.*]] = extractelement <64 x i1> [[TMP3]], i32 30
+; CHECK-NEXT:    br i1 [[TMP674]], label %[[PRED_STORE_IF443:.*]], label %[[PRED_STORE_CONTINUE444:.*]]
+; CHECK:       [[PRED_STORE_IF443]]:
+; CHECK-NEXT:    [[TMP675:%.*]] = add i64 [[INDEX]], 222
+; CHECK-NEXT:    [[TMP676:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP675]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP676]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE444]]
+; CHECK:       [[PRED_STORE_CONTINUE444]]:
+; CHECK-NEXT:    [[TMP677:%.*]] = extractelement <64 x i1> [[TMP3]], i32 31
+; CHECK-NEXT:    br i1 [[TMP677]], label %[[PRED_STORE_IF445:.*]], label %[[PRED_STORE_CONTINUE446:.*]]
+; CHECK:       [[PRED_STORE_IF445]]:
+; CHECK-NEXT:    [[TMP678:%.*]] = add i64 [[INDEX]], 223
+; CHECK-NEXT:    [[TMP679:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP678]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP679]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE446]]
+; CHECK:       [[PRED_STORE_CONTINUE446]]:
+; CHECK-NEXT:    [[TMP680:%.*]] = extractelement <64 x i1> [[TMP3]], i32 32
+; CHECK-NEXT:    br i1 [[TMP680]], label %[[PRED_STORE_IF447:.*]], label %[[PRED_STORE_CONTINUE448:.*]]
+; CHECK:       [[PRED_STORE_IF447]]:
+; CHECK-NEXT:    [[TMP681:%.*]] = add i64 [[INDEX]], 224
+; CHECK-NEXT:    [[TMP682:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP681]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP682]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE448]]
+; CHECK:       [[PRED_STORE_CONTINUE448]]:
+; CHECK-NEXT:    [[TMP683:%.*]] = extractelement <64 x i1> [[TMP3]], i32 33
+; CHECK-NEXT:    br i1 [[TMP683]], label %[[PRED_STORE_IF449:.*]], label %[[PRED_STORE_CONTINUE450:.*]]
+; CHECK:       [[PRED_STORE_IF449]]:
+; CHECK-NEXT:    [[TMP684:%.*]] = add i64 [[INDEX]], 225
+; CHECK-NEXT:    [[TMP685:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP684]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP685]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE450]]
+; CHECK:       [[PRED_STORE_CONTINUE450]]:
+; CHECK-NEXT:    [[TMP686:%.*]] = extractelement <64 x i1> [[TMP3]], i32 34
+; CHECK-NEXT:    br i1 [[TMP686]], label %[[PRED_STORE_IF451:.*]], label %[[PRED_STORE_CONTINUE452:.*]]
+; CHECK:       [[PRED_STORE_IF451]]:
+; CHECK-NEXT:    [[TMP687:%.*]] = add i64 [[INDEX]], 226
+; CHECK-NEXT:    [[TMP688:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP687]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP688]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE452]]
+; CHECK:       [[PRED_STORE_CONTINUE452]]:
+; CHECK-NEXT:    [[TMP689:%.*]] = extractelement <64 x i1> [[TMP3]], i32 35
+; CHECK-NEXT:    br i1 [[TMP689]], label %[[PRED_STORE_IF453:.*]], label %[[PRED_STORE_CONTINUE454:.*]]
+; CHECK:       [[PRED_STORE_IF453]]:
+; CHECK-NEXT:    [[TMP690:%.*]] = add i64 [[INDEX]], 227
+; CHECK-NEXT:    [[TMP691:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP690]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP691]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE454]]
+; CHECK:       [[PRED_STORE_CONTINUE454]]:
+; CHECK-NEXT:    [[TMP692:%.*]] = extractelement <64 x i1> [[TMP3]], i32 36
+; CHECK-NEXT:    br i1 [[TMP692]], label %[[PRED_STORE_IF455:.*]], label %[[PRED_STORE_CONTINUE456:.*]]
+; CHECK:       [[PRED_STORE_IF455]]:
+; CHECK-NEXT:    [[TMP693:%.*]] = add i64 [[INDEX]], 228
+; CHECK-NEXT:    [[TMP694:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP693]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP694]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE456]]
+; CHECK:       [[PRED_STORE_CONTINUE456]]:
+; CHECK-NEXT:    [[TMP695:%.*]] = extractelement <64 x i1> [[TMP3]], i32 37
+; CHECK-NEXT:    br i1 [[TMP695]], label %[[PRED_STORE_IF457:.*]], label %[[PRED_STORE_CONTINUE458:.*]]
+; CHECK:       [[PRED_STORE_IF457]]:
+; CHECK-NEXT:    [[TMP696:%.*]] = add i64 [[INDEX]], 229
+; CHECK-NEXT:    [[TMP697:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP696]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP697]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE458]]
+; CHECK:       [[PRED_STORE_CONTINUE458]]:
+; CHECK-NEXT:    [[TMP698:%.*]] = extractelement <64 x i1> [[TMP3]], i32 38
+; CHECK-NEXT:    br i1 [[TMP698]], label %[[PRED_STORE_IF459:.*]], label %[[PRED_STORE_CONTINUE460:.*]]
+; CHECK:       [[PRED_STORE_IF459]]:
+; CHECK-NEXT:    [[TMP699:%.*]] = add i64 [[INDEX]], 230
+; CHECK-NEXT:    [[TMP700:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP699]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP700]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE460]]
+; CHECK:       [[PRED_STORE_CONTINUE460]]:
+; CHECK-NEXT:    [[TMP701:%.*]] = extractelement <64 x i1> [[TMP3]], i32 39
+; CHECK-NEXT:    br i1 [[TMP701]], label %[[PRED_STORE_IF461:.*]], label %[[PRED_STORE_CONTINUE462:.*]]
+; CHECK:       [[PRED_STORE_IF461]]:
+; CHECK-NEXT:    [[TMP702:%.*]] = add i64 [[INDEX]], 231
+; CHECK-NEXT:    [[TMP703:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP702]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP703]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE462]]
+; CHECK:       [[PRED_STORE_CONTINUE462]]:
+; CHECK-NEXT:    [[TMP704:%.*]] = extractelement <64 x i1> [[TMP3]], i32 40
+; CHECK-NEXT:    br i1 [[TMP704]], label %[[PRED_STORE_IF463:.*]], label %[[PRED_STORE_CONTINUE464:.*]]
+; CHECK:       [[PRED_STORE_IF463]]:
+; CHECK-NEXT:    [[TMP705:%.*]] = add i64 [[INDEX]], 232
+; CHECK-NEXT:    [[TMP706:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP705]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP706]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE464]]
+; CHECK:       [[PRED_STORE_CONTINUE464]]:
+; CHECK-NEXT:    [[TMP707:%.*]] = extractelement <64 x i1> [[TMP3]], i32 41
+; CHECK-NEXT:    br i1 [[TMP707]], label %[[PRED_STORE_IF465:.*]], label %[[PRED_STORE_CONTINUE466:.*]]
+; CHECK:       [[PRED_STORE_IF465]]:
+; CHECK-NEXT:    [[TMP708:%.*]] = add i64 [[INDEX]], 233
+; CHECK-NEXT:    [[TMP709:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP708]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP709]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE466]]
+; CHECK:       [[PRED_STORE_CONTINUE466]]:
+; CHECK-NEXT:    [[TMP710:%.*]] = extractelement <64 x i1> [[TMP3]], i32 42
+; CHECK-NEXT:    br i1 [[TMP710]], label %[[PRED_STORE_IF467:.*]], label %[[PRED_STORE_CONTINUE468:.*]]
+; CHECK:       [[PRED_STORE_IF467]]:
+; CHECK-NEXT:    [[TMP711:%.*]] = add i64 [[INDEX]], 234
+; CHECK-NEXT:    [[TMP712:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP711]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP712]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE468]]
+; CHECK:       [[PRED_STORE_CONTINUE468]]:
+; CHECK-NEXT:    [[TMP713:%.*]] = extractelement <64 x i1> [[TMP3]], i32 43
+; CHECK-NEXT:    br i1 [[TMP713]], label %[[PRED_STORE_IF469:.*]], label %[[PRED_STORE_CONTINUE470:.*]]
+; CHECK:       [[PRED_STORE_IF469]]:
+; CHECK-NEXT:    [[TMP714:%.*]] = add i64 [[INDEX]], 235
+; CHECK-NEXT:    [[TMP715:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP714]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP715]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE470]]
+; CHECK:       [[PRED_STORE_CONTINUE470]]:
+; CHECK-NEXT:    [[TMP716:%.*]] = extractelement <64 x i1> [[TMP3]], i32 44
+; CHECK-NEXT:    br i1 [[TMP716]], label %[[PRED_STORE_IF471:.*]], label %[[PRED_STORE_CONTINUE472:.*]]
+; CHECK:       [[PRED_STORE_IF471]]:
+; CHECK-NEXT:    [[TMP717:%.*]] = add i64 [[INDEX]], 236
+; CHECK-NEXT:    [[TMP718:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP717]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP718]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE472]]
+; CHECK:       [[PRED_STORE_CONTINUE472]]:
+; CHECK-NEXT:    [[TMP719:%.*]] = extractelement <64 x i1> [[TMP3]], i32 45
+; CHECK-NEXT:    br i1 [[TMP719]], label %[[PRED_STORE_IF473:.*]], label %[[PRED_STORE_CONTINUE474:.*]]
+; CHECK:       [[PRED_STORE_IF473]]:
+; CHECK-NEXT:    [[TMP720:%.*]] = add i64 [[INDEX]], 237
+; CHECK-NEXT:    [[TMP721:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP720]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP721]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE474]]
+; CHECK:       [[PRED_STORE_CONTINUE474]]:
+; CHECK-NEXT:    [[TMP722:%.*]] = extractelement <64 x i1> [[TMP3]], i32 46
+; CHECK-NEXT:    br i1 [[TMP722]], label %[[PRED_STORE_IF475:.*]], label %[[PRED_STORE_CONTINUE476:.*]]
+; CHECK:       [[PRED_STORE_IF475]]:
+; CHECK-NEXT:    [[TMP723:%.*]] = add i64 [[INDEX]], 238
+; CHECK-NEXT:    [[TMP724:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP723]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP724]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE476]]
+; CHECK:       [[PRED_STORE_CONTINUE476]]:
+; CHECK-NEXT:    [[TMP725:%.*]] = extractelement <64 x i1> [[TMP3]], i32 47
+; CHECK-NEXT:    br i1 [[TMP725]], label %[[PRED_STORE_IF477:.*]], label %[[PRED_STORE_CONTINUE478:.*]]
+; CHECK:       [[PRED_STORE_IF477]]:
+; CHECK-NEXT:    [[TMP726:%.*]] = add i64 [[INDEX]], 239
+; CHECK-NEXT:    [[TMP727:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP726]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP727]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE478]]
+; CHECK:       [[PRED_STORE_CONTINUE478]]:
+; CHECK-NEXT:    [[TMP728:%.*]] = extractelement <64 x i1> [[TMP3]], i32 48
+; CHECK-NEXT:    br i1 [[TMP728]], label %[[PRED_STORE_IF479:.*]], label %[[PRED_STORE_CONTINUE480:.*]]
+; CHECK:       [[PRED_STORE_IF479]]:
+; CHECK-NEXT:    [[TMP729:%.*]] = add i64 [[INDEX]], 240
+; CHECK-NEXT:    [[TMP730:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP729]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP730]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE480]]
+; CHECK:       [[PRED_STORE_CONTINUE480]]:
+; CHECK-NEXT:    [[TMP731:%.*]] = extractelement <64 x i1> [[TMP3]], i32 49
+; CHECK-NEXT:    br i1 [[TMP731]], label %[[PRED_STORE_IF481:.*]], label %[[PRED_STORE_CONTINUE482:.*]]
+; CHECK:       [[PRED_STORE_IF481]]:
+; CHECK-NEXT:    [[TMP732:%.*]] = add i64 [[INDEX]], 241
+; CHECK-NEXT:    [[TMP733:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP732]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP733]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE482]]
+; CHECK:       [[PRED_STORE_CONTINUE482]]:
+; CHECK-NEXT:    [[TMP734:%.*]] = extractelement <64 x i1> [[TMP3]], i32 50
+; CHECK-NEXT:    br i1 [[TMP734]], label %[[PRED_STORE_IF483:.*]], label %[[PRED_STORE_CONTINUE484:.*]]
+; CHECK:       [[PRED_STORE_IF483]]:
+; CHECK-NEXT:    [[TMP735:%.*]] = add i64 [[INDEX]], 242
+; CHECK-NEXT:    [[TMP736:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP735]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP736]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE484]]
+; CHECK:       [[PRED_STORE_CONTINUE484]]:
+; CHECK-NEXT:    [[TMP737:%.*]] = extractelement <64 x i1> [[TMP3]], i32 51
+; CHECK-NEXT:    br i1 [[TMP737]], label %[[PRED_STORE_IF485:.*]], label %[[PRED_STORE_CONTINUE486:.*]]
+; CHECK:       [[PRED_STORE_IF485]]:
+; CHECK-NEXT:    [[TMP738:%.*]] = add i64 [[INDEX]], 243
+; CHECK-NEXT:    [[TMP739:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP738]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP739]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE486]]
+; CHECK:       [[PRED_STORE_CONTINUE486]]:
+; CHECK-NEXT:    [[TMP740:%.*]] = extractelement <64 x i1> [[TMP3]], i32 52
+; CHECK-NEXT:    br i1 [[TMP740]], label %[[PRED_STORE_IF487:.*]], label %[[PRED_STORE_CONTINUE488:.*]]
+; CHECK:       [[PRED_STORE_IF487]]:
+; CHECK-NEXT:    [[TMP741:%.*]] = add i64 [[INDEX]], 244
+; CHECK-NEXT:    [[TMP742:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP741]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP742]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE488]]
+; CHECK:       [[PRED_STORE_CONTINUE488]]:
+; CHECK-NEXT:    [[TMP743:%.*]] = extractelement <64 x i1> [[TMP3]], i32 53
+; CHECK-NEXT:    br i1 [[TMP743]], label %[[PRED_STORE_IF489:.*]], label %[[PRED_STORE_CONTINUE490:.*]]
+; CHECK:       [[PRED_STORE_IF489]]:
+; CHECK-NEXT:    [[TMP744:%.*]] = add i64 [[INDEX]], 245
+; CHECK-NEXT:    [[TMP745:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP744]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP745]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE490]]
+; CHECK:       [[PRED_STORE_CONTINUE490]]:
+; CHECK-NEXT:    [[TMP746:%.*]] = extractelement <64 x i1> [[TMP3]], i32 54
+; CHECK-NEXT:    br i1 [[TMP746]], label %[[PRED_STORE_IF491:.*]], label %[[PRED_STORE_CONTINUE492:.*]]
+; CHECK:       [[PRED_STORE_IF491]]:
+; CHECK-NEXT:    [[TMP747:%.*]] = add i64 [[INDEX]], 246
+; CHECK-NEXT:    [[TMP748:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP747]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP748]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE492]]
+; CHECK:       [[PRED_STORE_CONTINUE492]]:
+; CHECK-NEXT:    [[TMP749:%.*]] = extractelement <64 x i1> [[TMP3]], i32 55
+; CHECK-NEXT:    br i1 [[TMP749]], label %[[PRED_STORE_IF493:.*]], label %[[PRED_STORE_CONTINUE494:.*]]
+; CHECK:       [[PRED_STORE_IF493]]:
+; CHECK-NEXT:    [[TMP750:%.*]] = add i64 [[INDEX]], 247
+; CHECK-NEXT:    [[TMP751:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP750]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP751]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE494]]
+; CHECK:       [[PRED_STORE_CONTINUE494]]:
+; CHECK-NEXT:    [[TMP752:%.*]] = extractelement <64 x i1> [[TMP3]], i32 56
+; CHECK-NEXT:    br i1 [[TMP752]], label %[[PRED_STORE_IF495:.*]], label %[[PRED_STORE_CONTINUE496:.*]]
+; CHECK:       [[PRED_STORE_IF495]]:
+; CHECK-NEXT:    [[TMP753:%.*]] = add i64 [[INDEX]], 248
+; CHECK-NEXT:    [[TMP754:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP753]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP754]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE496]]
+; CHECK:       [[PRED_STORE_CONTINUE496]]:
+; CHECK-NEXT:    [[TMP755:%.*]] = extractelement <64 x i1> [[TMP3]], i32 57
+; CHECK-NEXT:    br i1 [[TMP755]], label %[[PRED_STORE_IF497:.*]], label %[[PRED_STORE_CONTINUE498:.*]]
+; CHECK:       [[PRED_STORE_IF497]]:
+; CHECK-NEXT:    [[TMP756:%.*]] = add i64 [[INDEX]], 249
+; CHECK-NEXT:    [[TMP757:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP756]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP757]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE498]]
+; CHECK:       [[PRED_STORE_CONTINUE498]]:
+; CHECK-NEXT:    [[TMP758:%.*]] = extractelement <64 x i1> [[TMP3]], i32 58
+; CHECK-NEXT:    br i1 [[TMP758]], label %[[PRED_STORE_IF499:.*]], label %[[PRED_STORE_CONTINUE500:.*]]
+; CHECK:       [[PRED_STORE_IF499]]:
+; CHECK-NEXT:    [[TMP759:%.*]] = add i64 [[INDEX]], 250
+; CHECK-NEXT:    [[TMP760:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP759]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP760]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE500]]
+; CHECK:       [[PRED_STORE_CONTINUE500]]:
+; CHECK-NEXT:    [[TMP761:%.*]] = extractelement <64 x i1> [[TMP3]], i32 59
+; CHECK-NEXT:    br i1 [[TMP761]], label %[[PRED_STORE_IF501:.*]], label %[[PRED_STORE_CONTINUE502:.*]]
+; CHECK:       [[PRED_STORE_IF501]]:
+; CHECK-NEXT:    [[TMP762:%.*]] = add i64 [[INDEX]], 251
+; CHECK-NEXT:    [[TMP763:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP762]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP763]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE502]]
+; CHECK:       [[PRED_STORE_CONTINUE502]]:
+; CHECK-NEXT:    [[TMP764:%.*]] = extractelement <64 x i1> [[TMP3]], i32 60
+; CHECK-NEXT:    br i1 [[TMP764]], label %[[PRED_STORE_IF503:.*]], label %[[PRED_STORE_CONTINUE504:.*]]
+; CHECK:       [[PRED_STORE_IF503]]:
+; CHECK-NEXT:    [[TMP765:%.*]] = add i64 [[INDEX]], 252
+; CHECK-NEXT:    [[TMP766:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP765]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP766]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE504]]
+; CHECK:       [[PRED_STORE_CONTINUE504]]:
+; CHECK-NEXT:    [[TMP767:%.*]] = extractelement <64 x i1> [[TMP3]], i32 61
+; CHECK-NEXT:    br i1 [[TMP767]], label %[[PRED_STORE_IF505:.*]], label %[[PRED_STORE_CONTINUE506:.*]]
+; CHECK:       [[PRED_STORE_IF505]]:
+; CHECK-NEXT:    [[TMP768:%.*]] = add i64 [[INDEX]], 253
+; CHECK-NEXT:    [[TMP769:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP768]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP769]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE506]]
+; CHECK:       [[PRED_STORE_CONTINUE506]]:
+; CHECK-NEXT:    [[TMP770:%.*]] = extractelement <64 x i1> [[TMP3]], i32 62
+; CHECK-NEXT:    br i1 [[TMP770]], label %[[PRED_STORE_IF507:.*]], label %[[PRED_STORE_CONTINUE508:.*]]
+; CHECK:       [[PRED_STORE_IF507]]:
+; CHECK-NEXT:    [[TMP771:%.*]] = add i64 [[INDEX]], 254
+; CHECK-NEXT:    [[TMP772:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP771]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP772]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE508]]
+; CHECK:       [[PRED_STORE_CONTINUE508]]:
+; CHECK-NEXT:    [[TMP773:%.*]] = extractelement <64 x i1> [[TMP3]], i32 63
+; CHECK-NEXT:    br i1 [[TMP773]], label %[[PRED_STORE_IF509:.*]], label %[[PRED_STORE_CONTINUE510:.*]]
+; CHECK:       [[PRED_STORE_IF509]]:
+; CHECK-NEXT:    [[TMP774:%.*]] = add i64 [[INDEX]], 255
+; CHECK-NEXT:    [[TMP775:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP774]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP775]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE510]]
+; CHECK:       [[PRED_STORE_CONTINUE510]]:
+; CHECK-NEXT:    [[TMP776:%.*]] = extractelement <64 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP776]], label %[[PRED_STORE_IF511:.*]], label %[[PRED_STORE_CONTINUE512:.*]]
+; CHECK:       [[PRED_STORE_IF511]]:
+; CHECK-NEXT:    [[TMP777:%.*]] = add i64 [[INDEX]], 256
+; CHECK-NEXT:    [[TMP778:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP777]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP778]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE512]]
+; CHECK:       [[PRED_STORE_CONTINUE512]]:
+; CHECK-NEXT:    [[TMP779:%.*]] = extractelement <64 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP779]], label %[[PRED_STORE_IF513:.*]], label %[[PRED_STORE_CONTINUE514:.*]]
+; CHECK:       [[PRED_STORE_IF513]]:
+; CHECK-NEXT:    [[TMP780:%.*]] = add i64 [[INDEX]], 257
+; CHECK-NEXT:    [[TMP781:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP780]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP781]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE514]]
+; CHECK:       [[PRED_STORE_CONTINUE514]]:
+; CHECK-NEXT:    [[TMP782:%.*]] = extractelement <64 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP782]], label %[[PRED_STORE_IF515:.*]], label %[[PRED_STORE_CONTINUE516:.*]]
+; CHECK:       [[PRED_STORE_IF515]]:
+; CHECK-NEXT:    [[TMP783:%.*]] = add i64 [[INDEX]], 258
+; CHECK-NEXT:    [[TMP784:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP783]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP784]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE516]]
+; CHECK:       [[PRED_STORE_CONTINUE516]]:
+; CHECK-NEXT:    [[TMP785:%.*]] = extractelement <64 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP785]], label %[[PRED_STORE_IF517:.*]], label %[[PRED_STORE_CONTINUE518:.*]]
+; CHECK:       [[PRED_STORE_IF517]]:
+; CHECK-NEXT:    [[TMP786:%.*]] = add i64 [[INDEX]], 259
+; CHECK-NEXT:    [[TMP787:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP786]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP787]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE518]]
+; CHECK:       [[PRED_STORE_CONTINUE518]]:
+; CHECK-NEXT:    [[TMP788:%.*]] = extractelement <64 x i1> [[TMP4]], i32 4
+; CHECK-NEXT:    br i1 [[TMP788]], label %[[PRED_STORE_IF519:.*]], label %[[PRED_STORE_CONTINUE520:.*]]
+; CHECK:       [[PRED_STORE_IF519]]:
+; CHECK-NEXT:    [[TMP789:%.*]] = add i64 [[INDEX]], 260
+; CHECK-NEXT:    [[TMP790:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP789]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP790]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE520]]
+; CHECK:       [[PRED_STORE_CONTINUE520]]:
+; CHECK-NEXT:    [[TMP791:%.*]] = extractelement <64 x i1> [[TMP4]], i32 5
+; CHECK-NEXT:    br i1 [[TMP791]], label %[[PRED_STORE_IF521:.*]], label %[[PRED_STORE_CONTINUE522:.*]]
+; CHECK:       [[PRED_STORE_IF521]]:
+; CHECK-NEXT:    [[TMP792:%.*]] = add i64 [[INDEX]], 261
+; CHECK-NEXT:    [[TMP793:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP792]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP793]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE522]]
+; CHECK:       [[PRED_STORE_CONTINUE522]]:
+; CHECK-NEXT:    [[TMP794:%.*]] = extractelement <64 x i1> [[TMP4]], i32 6
+; CHECK-NEXT:    br i1 [[TMP794]], label %[[PRED_STORE_IF523:.*]], label %[[PRED_STORE_CONTINUE524:.*]]
+; CHECK:       [[PRED_STORE_IF523]]:
+; CHECK-NEXT:    [[TMP795:%.*]] = add i64 [[INDEX]], 262
+; CHECK-NEXT:    [[TMP796:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP795]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP796]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE524]]
+; CHECK:       [[PRED_STORE_CONTINUE524]]:
+; CHECK-NEXT:    [[TMP797:%.*]] = extractelement <64 x i1> [[TMP4]], i32 7
+; CHECK-NEXT:    br i1 [[TMP797]], label %[[PRED_STORE_IF525:.*]], label %[[PRED_STORE_CONTINUE526:.*]]
+; CHECK:       [[PRED_STORE_IF525]]:
+; CHECK-NEXT:    [[TMP798:%.*]] = add i64 [[INDEX]], 263
+; CHECK-NEXT:    [[TMP799:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP798]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP799]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE526]]
+; CHECK:       [[PRED_STORE_CONTINUE526]]:
+; CHECK-NEXT:    [[TMP800:%.*]] = extractelement <64 x i1> [[TMP4]], i32 8
+; CHECK-NEXT:    br i1 [[TMP800]], label %[[PRED_STORE_IF527:.*]], label %[[PRED_STORE_CONTINUE528:.*]]
+; CHECK:       [[PRED_STORE_IF527]]:
+; CHECK-NEXT:    [[TMP801:%.*]] = add i64 [[INDEX]], 264
+; CHECK-NEXT:    [[TMP802:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP801]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP802]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE528]]
+; CHECK:       [[PRED_STORE_CONTINUE528]]:
+; CHECK-NEXT:    [[TMP803:%.*]] = extractelement <64 x i1> [[TMP4]], i32 9
+; CHECK-NEXT:    br i1 [[TMP803]], label %[[PRED_STORE_IF529:.*]], label %[[PRED_STORE_CONTINUE530:.*]]
+; CHECK:       [[PRED_STORE_IF529]]:
+; CHECK-NEXT:    [[TMP804:%.*]] = add i64 [[INDEX]], 265
+; CHECK-NEXT:    [[TMP805:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP804]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP805]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE530]]
+; CHECK:       [[PRED_STORE_CONTINUE530]]:
+; CHECK-NEXT:    [[TMP806:%.*]] = extractelement <64 x i1> [[TMP4]], i32 10
+; CHECK-NEXT:    br i1 [[TMP806]], label %[[PRED_STORE_IF531:.*]], label %[[PRED_STORE_CONTINUE532:.*]]
+; CHECK:       [[PRED_STORE_IF531]]:
+; CHECK-NEXT:    [[TMP807:%.*]] = add i64 [[INDEX]], 266
+; CHECK-NEXT:    [[TMP808:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP807]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP808]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE532]]
+; CHECK:       [[PRED_STORE_CONTINUE532]]:
+; CHECK-NEXT:    [[TMP809:%.*]] = extractelement <64 x i1> [[TMP4]], i32 11
+; CHECK-NEXT:    br i1 [[TMP809]], label %[[PRED_STORE_IF533:.*]], label %[[PRED_STORE_CONTINUE534:.*]]
+; CHECK:       [[PRED_STORE_IF533]]:
+; CHECK-NEXT:    [[TMP810:%.*]] = add i64 [[INDEX]], 267
+; CHECK-NEXT:    [[TMP811:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP810]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP811]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE534]]
+; CHECK:       [[PRED_STORE_CONTINUE534]]:
+; CHECK-NEXT:    [[TMP812:%.*]] = extractelement <64 x i1> [[TMP4]], i32 12
+; CHECK-NEXT:    br i1 [[TMP812]], label %[[PRED_STORE_IF535:.*]], label %[[PRED_STORE_CONTINUE536:.*]]
+; CHECK:       [[PRED_STORE_IF535]]:
+; CHECK-NEXT:    [[TMP813:%.*]] = add i64 [[INDEX]], 268
+; CHECK-NEXT:    [[TMP814:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP813]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP814]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE536]]
+; CHECK:       [[PRED_STORE_CONTINUE536]]:
+; CHECK-NEXT:    [[TMP815:%.*]] = extractelement <64 x i1> [[TMP4]], i32 13
+; CHECK-NEXT:    br i1 [[TMP815]], label %[[PRED_STORE_IF537:.*]], label %[[PRED_STORE_CONTINUE538:.*]]
+; CHECK:       [[PRED_STORE_IF537]]:
+; CHECK-NEXT:    [[TMP816:%.*]] = add i64 [[INDEX]], 269
+; CHECK-NEXT:    [[TMP817:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP816]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP817]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE538]]
+; CHECK:       [[PRED_STORE_CONTINUE538]]:
+; CHECK-NEXT:    [[TMP818:%.*]] = extractelement <64 x i1> [[TMP4]], i32 14
+; CHECK-NEXT:    br i1 [[TMP818]], label %[[PRED_STORE_IF539:.*]], label %[[PRED_STORE_CONTINUE540:.*]]
+; CHECK:       [[PRED_STORE_IF539]]:
+; CHECK-NEXT:    [[TMP819:%.*]] = add i64 [[INDEX]], 270
+; CHECK-NEXT:    [[TMP820:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP819]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP820]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE540]]
+; CHECK:       [[PRED_STORE_CONTINUE540]]:
+; CHECK-NEXT:    [[TMP821:%.*]] = extractelement <64 x i1> [[TMP4]], i32 15
+; CHECK-NEXT:    br i1 [[TMP821]], label %[[PRED_STORE_IF541:.*]], label %[[PRED_STORE_CONTINUE542:.*]]
+; CHECK:       [[PRED_STORE_IF541]]:
+; CHECK-NEXT:    [[TMP822:%.*]] = add i64 [[INDEX]], 271
+; CHECK-NEXT:    [[TMP823:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP822]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP823]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE542]]
+; CHECK:       [[PRED_STORE_CONTINUE542]]:
+; CHECK-NEXT:    [[TMP824:%.*]] = extractelement <64 x i1> [[TMP4]], i32 16
+; CHECK-NEXT:    br i1 [[TMP824]], label %[[PRED_STORE_IF543:.*]], label %[[PRED_STORE_CONTINUE544:.*]]
+; CHECK:       [[PRED_STORE_IF543]]:
+; CHECK-NEXT:    [[TMP825:%.*]] = add i64 [[INDEX]], 272
+; CHECK-NEXT:    [[TMP826:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP825]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP826]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE544]]
+; CHECK:       [[PRED_STORE_CONTINUE544]]:
+; CHECK-NEXT:    [[TMP827:%.*]] = extractelement <64 x i1> [[TMP4]], i32 17
+; CHECK-NEXT:    br i1 [[TMP827]], label %[[PRED_STORE_IF545:.*]], label %[[PRED_STORE_CONTINUE546:.*]]
+; CHECK:       [[PRED_STORE_IF545]]:
+; CHECK-NEXT:    [[TMP828:%.*]] = add i64 [[INDEX]], 273
+; CHECK-NEXT:    [[TMP829:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP828]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP829]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE546]]
+; CHECK:       [[PRED_STORE_CONTINUE546]]:
+; CHECK-NEXT:    [[TMP830:%.*]] = extractelement <64 x i1> [[TMP4]], i32 18
+; CHECK-NEXT:    br i1 [[TMP830]], label %[[PRED_STORE_IF547:.*]], label %[[PRED_STORE_CONTINUE548:.*]]
+; CHECK:       [[PRED_STORE_IF547]]:
+; CHECK-NEXT:    [[TMP831:%.*]] = add i64 [[INDEX]], 274
+; CHECK-NEXT:    [[TMP832:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP831]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP832]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE548]]
+; CHECK:       [[PRED_STORE_CONTINUE548]]:
+; CHECK-NEXT:    [[TMP833:%.*]] = extractelement <64 x i1> [[TMP4]], i32 19
+; CHECK-NEXT:    br i1 [[TMP833]], label %[[PRED_STORE_IF549:.*]], label %[[PRED_STORE_CONTINUE550:.*]]
+; CHECK:       [[PRED_STORE_IF549]]:
+; CHECK-NEXT:    [[TMP834:%.*]] = add i64 [[INDEX]], 275
+; CHECK-NEXT:    [[TMP835:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP834]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP835]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE550]]
+; CHECK:       [[PRED_STORE_CONTINUE550]]:
+; CHECK-NEXT:    [[TMP836:%.*]] = extractelement <64 x i1> [[TMP4]], i32 20
+; CHECK-NEXT:    br i1 [[TMP836]], label %[[PRED_STORE_IF551:.*]], label %[[PRED_STORE_CONTINUE552:.*]]
+; CHECK:       [[PRED_STORE_IF551]]:
+; CHECK-NEXT:    [[TMP837:%.*]] = add i64 [[INDEX]], 276
+; CHECK-NEXT:    [[TMP838:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP837]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP838]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE552]]
+; CHECK:       [[PRED_STORE_CONTINUE552]]:
+; CHECK-NEXT:    [[TMP839:%.*]] = extractelement <64 x i1> [[TMP4]], i32 21
+; CHECK-NEXT:    br i1 [[TMP839]], label %[[PRED_STORE_IF553:.*]], label %[[PRED_STORE_CONTINUE554:.*]]
+; CHECK:       [[PRED_STORE_IF553]]:
+; CHECK-NEXT:    [[TMP840:%.*]] = add i64 [[INDEX]], 277
+; CHECK-NEXT:    [[TMP841:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP840]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP841]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE554]]
+; CHECK:       [[PRED_STORE_CONTINUE554]]:
+; CHECK-NEXT:    [[TMP842:%.*]] = extractelement <64 x i1> [[TMP4]], i32 22
+; CHECK-NEXT:    br i1 [[TMP842]], label %[[PRED_STORE_IF555:.*]], label %[[PRED_STORE_CONTINUE556:.*]]
+; CHECK:       [[PRED_STORE_IF555]]:
+; CHECK-NEXT:    [[TMP843:%.*]] = add i64 [[INDEX]], 278
+; CHECK-NEXT:    [[TMP844:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP843]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP844]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE556]]
+; CHECK:       [[PRED_STORE_CONTINUE556]]:
+; CHECK-NEXT:    [[TMP845:%.*]] = extractelement <64 x i1> [[TMP4]], i32 23
+; CHECK-NEXT:    br i1 [[TMP845]], label %[[PRED_STORE_IF557:.*]], label %[[PRED_STORE_CONTINUE558:.*]]
+; CHECK:       [[PRED_STORE_IF557]]:
+; CHECK-NEXT:    [[TMP846:%.*]] = add i64 [[INDEX]], 279
+; CHECK-NEXT:    [[TMP847:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP846]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP847]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE558]]
+; CHECK:       [[PRED_STORE_CONTINUE558]]:
+; CHECK-NEXT:    [[TMP848:%.*]] = extractelement <64 x i1> [[TMP4]], i32 24
+; CHECK-NEXT:    br i1 [[TMP848]], label %[[PRED_STORE_IF559:.*]], label %[[PRED_STORE_CONTINUE560:.*]]
+; CHECK:       [[PRED_STORE_IF559]]:
+; CHECK-NEXT:    [[TMP849:%.*]] = add i64 [[INDEX]], 280
+; CHECK-NEXT:    [[TMP850:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP849]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP850]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE560]]
+; CHECK:       [[PRED_STORE_CONTINUE560]]:
+; CHECK-NEXT:    [[TMP851:%.*]] = extractelement <64 x i1> [[TMP4]], i32 25
+; CHECK-NEXT:    br i1 [[TMP851]], label %[[PRED_STORE_IF561:.*]], label %[[PRED_STORE_CONTINUE562:.*]]
+; CHECK:       [[PRED_STORE_IF561]]:
+; CHECK-NEXT:    [[TMP852:%.*]] = add i64 [[INDEX]], 281
+; CHECK-NEXT:    [[TMP853:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP852]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP853]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE562]]
+; CHECK:       [[PRED_STORE_CONTINUE562]]:
+; CHECK-NEXT:    [[TMP854:%.*]] = extractelement <64 x i1> [[TMP4]], i32 26
+; CHECK-NEXT:    br i1 [[TMP854]], label %[[PRED_STORE_IF563:.*]], label %[[PRED_STORE_CONTINUE564:.*]]
+; CHECK:       [[PRED_STORE_IF563]]:
+; CHECK-NEXT:    [[TMP855:%.*]] = add i64 [[INDEX]], 282
+; CHECK-NEXT:    [[TMP856:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP855]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP856]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE564]]
+; CHECK:       [[PRED_STORE_CONTINUE564]]:
+; CHECK-NEXT:    [[TMP857:%.*]] = extractelement <64 x i1> [[TMP4]], i32 27
+; CHECK-NEXT:    br i1 [[TMP857]], label %[[PRED_STORE_IF565:.*]], label %[[PRED_STORE_CONTINUE566:.*]]
+; CHECK:       [[PRED_STORE_IF565]]:
+; CHECK-NEXT:    [[TMP858:%.*]] = add i64 [[INDEX]], 283
+; CHECK-NEXT:    [[TMP859:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP858]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP859]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE566]]
+; CHECK:       [[PRED_STORE_CONTINUE566]]:
+; CHECK-NEXT:    [[TMP860:%.*]] = extractelement <64 x i1> [[TMP4]], i32 28
+; CHECK-NEXT:    br i1 [[TMP860]], label %[[PRED_STORE_IF567:.*]], label %[[PRED_STORE_CONTINUE568:.*]]
+; CHECK:       [[PRED_STORE_IF567]]:
+; CHECK-NEXT:    [[TMP861:%.*]] = add i64 [[INDEX]], 284
+; CHECK-NEXT:    [[TMP862:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP861]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP862]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE568]]
+; CHECK:       [[PRED_STORE_CONTINUE568]]:
+; CHECK-NEXT:    [[TMP863:%.*]] = extractelement <64 x i1> [[TMP4]], i32 29
+; CHECK-NEXT:    br i1 [[TMP863]], label %[[PRED_STORE_IF569:.*]], label %[[PRED_STORE_CONTINUE570:.*]]
+; CHECK:       [[PRED_STORE_IF569]]:
+; CHECK-NEXT:    [[TMP864:%.*]] = add i64 [[INDEX]], 285
+; CHECK-NEXT:    [[TMP865:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP864]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP865]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE570]]
+; CHECK:       [[PRED_STORE_CONTINUE570]]:
+; CHECK-NEXT:    [[TMP866:%.*]] = extractelement <64 x i1> [[TMP4]], i32 30
+; CHECK-NEXT:    br i1 [[TMP866]], label %[[PRED_STORE_IF571:.*]], label %[[PRED_STORE_CONTINUE572:.*]]
+; CHECK:       [[PRED_STORE_IF571]]:
+; CHECK-NEXT:    [[TMP867:%.*]] = add i64 [[INDEX]], 286
+; CHECK-NEXT:    [[TMP868:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP867]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP868]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE572]]
+; CHECK:       [[PRED_STORE_CONTINUE572]]:
+; CHECK-NEXT:    [[TMP869:%.*]] = extractelement <64 x i1> [[TMP4]], i32 31
+; CHECK-NEXT:    br i1 [[TMP869]], label %[[PRED_STORE_IF573:.*]], label %[[PRED_STORE_CONTINUE574:.*]]
+; CHECK:       [[PRED_STORE_IF573]]:
+; CHECK-NEXT:    [[TMP870:%.*]] = add i64 [[INDEX]], 287
+; CHECK-NEXT:    [[TMP871:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP870]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP871]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE574]]
+; CHECK:       [[PRED_STORE_CONTINUE574]]:
+; CHECK-NEXT:    [[TMP872:%.*]] = extractelement <64 x i1> [[TMP4]], i32 32
+; CHECK-NEXT:    br i1 [[TMP872]], label %[[PRED_STORE_IF575:.*]], label %[[PRED_STORE_CONTINUE576:.*]]
+; CHECK:       [[PRED_STORE_IF575]]:
+; CHECK-NEXT:    [[TMP873:%.*]] = add i64 [[INDEX]], 288
+; CHECK-NEXT:    [[TMP874:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP873]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP874]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE576]]
+; CHECK:       [[PRED_STORE_CONTINUE576]]:
+; CHECK-NEXT:    [[TMP875:%.*]] = extractelement <64 x i1> [[TMP4]], i32 33
+; CHECK-NEXT:    br i1 [[TMP875]], label %[[PRED_STORE_IF577:.*]], label %[[PRED_STORE_CONTINUE578:.*]]
+; CHECK:       [[PRED_STORE_IF577]]:
+; CHECK-NEXT:    [[TMP876:%.*]] = add i64 [[INDEX]], 289
+; CHECK-NEXT:    [[TMP877:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP876]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP877]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE578]]
+; CHECK:       [[PRED_STORE_CONTINUE578]]:
+; CHECK-NEXT:    [[TMP878:%.*]] = extractelement <64 x i1> [[TMP4]], i32 34
+; CHECK-NEXT:    br i1 [[TMP878]], label %[[PRED_STORE_IF579:.*]], label %[[PRED_STORE_CONTINUE580:.*]]
+; CHECK:       [[PRED_STORE_IF579]]:
+; CHECK-NEXT:    [[TMP879:%.*]] = add i64 [[INDEX]], 290
+; CHECK-NEXT:    [[TMP880:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP879]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP880]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE580]]
+; CHECK:       [[PRED_STORE_CONTINUE580]]:
+; CHECK-NEXT:    [[TMP881:%.*]] = extractelement <64 x i1> [[TMP4]], i32 35
+; CHECK-NEXT:    br i1 [[TMP881]], label %[[PRED_STORE_IF581:.*]], label %[[PRED_STORE_CONTINUE582:.*]]
+; CHECK:       [[PRED_STORE_IF581]]:
+; CHECK-NEXT:    [[TMP882:%.*]] = add i64 [[INDEX]], 291
+; CHECK-NEXT:    [[TMP883:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP882]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP883]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE582]]
+; CHECK:       [[PRED_STORE_CONTINUE582]]:
+; CHECK-NEXT:    [[TMP884:%.*]] = extractelement <64 x i1> [[TMP4]], i32 36
+; CHECK-NEXT:    br i1 [[TMP884]], label %[[PRED_STORE_IF583:.*]], label %[[PRED_STORE_CONTINUE584:.*]]
+; CHECK:       [[PRED_STORE_IF583]]:
+; CHECK-NEXT:    [[TMP885:%.*]] = add i64 [[INDEX]], 292
+; CHECK-NEXT:    [[TMP886:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP885]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP886]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE584]]
+; CHECK:       [[PRED_STORE_CONTINUE584]]:
+; CHECK-NEXT:    [[TMP887:%.*]] = extractelement <64 x i1> [[TMP4]], i32 37
+; CHECK-NEXT:    br i1 [[TMP887]], label %[[PRED_STORE_IF585:.*]], label %[[PRED_STORE_CONTINUE586:.*]]
+; CHECK:       [[PRED_STORE_IF585]]:
+; CHECK-NEXT:    [[TMP888:%.*]] = add i64 [[INDEX]], 293
+; CHECK-NEXT:    [[TMP889:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP888]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP889]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE586]]
+; CHECK:       [[PRED_STORE_CONTINUE586]]:
+; CHECK-NEXT:    [[TMP890:%.*]] = extractelement <64 x i1> [[TMP4]], i32 38
+; CHECK-NEXT:    br i1 [[TMP890]], label %[[PRED_STORE_IF587:.*]], label %[[PRED_STORE_CONTINUE588:.*]]
+; CHECK:       [[PRED_STORE_IF587]]:
+; CHECK-NEXT:    [[TMP891:%.*]] = add i64 [[INDEX]], 294
+; CHECK-NEXT:    [[TMP892:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP891]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP892]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE588]]
+; CHECK:       [[PRED_STORE_CONTINUE588]]:
+; CHECK-NEXT:    [[TMP893:%.*]] = extractelement <64 x i1> [[TMP4]], i32 39
+; CHECK-NEXT:    br i1 [[TMP893]], label %[[PRED_STORE_IF589:.*]], label %[[PRED_STORE_CONTINUE590:.*]]
+; CHECK:       [[PRED_STORE_IF589]]:
+; CHECK-NEXT:    [[TMP894:%.*]] = add i64 [[INDEX]], 295
+; CHECK-NEXT:    [[TMP895:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP894]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP895]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE590]]
+; CHECK:       [[PRED_STORE_CONTINUE590]]:
+; CHECK-NEXT:    [[TMP896:%.*]] = extractelement <64 x i1> [[TMP4]], i32 40
+; CHECK-NEXT:    br i1 [[TMP896]], label %[[PRED_STORE_IF591:.*]], label %[[PRED_STORE_CONTINUE592:.*]]
+; CHECK:       [[PRED_STORE_IF591]]:
+; CHECK-NEXT:    [[TMP897:%.*]] = add i64 [[INDEX]], 296
+; CHECK-NEXT:    [[TMP898:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP897]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP898]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE592]]
+; CHECK:       [[PRED_STORE_CONTINUE592]]:
+; CHECK-NEXT:    [[TMP899:%.*]] = extractelement <64 x i1> [[TMP4]], i32 41
+; CHECK-NEXT:    br i1 [[TMP899]], label %[[PRED_STORE_IF593:.*]], label %[[PRED_STORE_CONTINUE594:.*]]
+; CHECK:       [[PRED_STORE_IF593]]:
+; CHECK-NEXT:    [[TMP900:%.*]] = add i64 [[INDEX]], 297
+; CHECK-NEXT:    [[TMP901:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP900]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP901]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE594]]
+; CHECK:       [[PRED_STORE_CONTINUE594]]:
+; CHECK-NEXT:    [[TMP902:%.*]] = extractelement <64 x i1> [[TMP4]], i32 42
+; CHECK-NEXT:    br i1 [[TMP902]], label %[[PRED_STORE_IF595:.*]], label %[[PRED_STORE_CONTINUE596:.*]]
+; CHECK:       [[PRED_STORE_IF595]]:
+; CHECK-NEXT:    [[TMP903:%.*]] = add i64 [[INDEX]], 298
+; CHECK-NEXT:    [[TMP904:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP903]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP904]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE596]]
+; CHECK:       [[PRED_STORE_CONTINUE596]]:
+; CHECK-NEXT:    [[TMP905:%.*]] = extractelement <64 x i1> [[TMP4]], i32 43
+; CHECK-NEXT:    br i1 [[TMP905]], label %[[PRED_STORE_IF597:.*]], label %[[PRED_STORE_CONTINUE598:.*]]
+; CHECK:       [[PRED_STORE_IF597]]:
+; CHECK-NEXT:    [[TMP906:%.*]] = add i64 [[INDEX]], 299
+; CHECK-NEXT:    [[TMP907:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP906]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP907]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE598]]
+; CHECK:       [[PRED_STORE_CONTINUE598]]:
+; CHECK-NEXT:    [[TMP908:%.*]] = extractelement <64 x i1> [[TMP4]], i32 44
+; CHECK-NEXT:    br i1 [[TMP908]], label %[[PRED_STORE_IF599:.*]], label %[[PRED_STORE_CONTINUE600:.*]]
+; CHECK:       [[PRED_STORE_IF599]]:
+; CHECK-NEXT:    [[TMP909:%.*]] = add i64 [[INDEX]], 300
+; CHECK-NEXT:    [[TMP910:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP909]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP910]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE600]]
+; CHECK:       [[PRED_STORE_CONTINUE600]]:
+; CHECK-NEXT:    [[TMP911:%.*]] = extractelement <64 x i1> [[TMP4]], i32 45
+; CHECK-NEXT:    br i1 [[TMP911]], label %[[PRED_STORE_IF601:.*]], label %[[PRED_STORE_CONTINUE602:.*]]
+; CHECK:       [[PRED_STORE_IF601]]:
+; CHECK-NEXT:    [[TMP912:%.*]] = add i64 [[INDEX]], 301
+; CHECK-NEXT:    [[TMP913:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP912]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP913]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE602]]
+; CHECK:       [[PRED_STORE_CONTINUE602]]:
+; CHECK-NEXT:    [[TMP914:%.*]] = extractelement <64 x i1> [[TMP4]], i32 46
+; CHECK-NEXT:    br i1 [[TMP914]], label %[[PRED_STORE_IF603:.*]], label %[[PRED_STORE_CONTINUE604:.*]]
+; CHECK:       [[PRED_STORE_IF603]]:
+; CHECK-NEXT:    [[TMP915:%.*]] = add i64 [[INDEX]], 302
+; CHECK-NEXT:    [[TMP916:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP915]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP916]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE604]]
+; CHECK:       [[PRED_STORE_CONTINUE604]]:
+; CHECK-NEXT:    [[TMP917:%.*]] = extractelement <64 x i1> [[TMP4]], i32 47
+; CHECK-NEXT:    br i1 [[TMP917]], label %[[PRED_STORE_IF605:.*]], label %[[PRED_STORE_CONTINUE606:.*]]
+; CHECK:       [[PRED_STORE_IF605]]:
+; CHECK-NEXT:    [[TMP918:%.*]] = add i64 [[INDEX]], 303
+; CHECK-NEXT:    [[TMP919:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP918]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP919]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE606]]
+; CHECK:       [[PRED_STORE_CONTINUE606]]:
+; CHECK-NEXT:    [[TMP920:%.*]] = extractelement <64 x i1> [[TMP4]], i32 48
+; CHECK-NEXT:    br i1 [[TMP920]], label %[[PRED_STORE_IF607:.*]], label %[[PRED_STORE_CONTINUE608:.*]]
+; CHECK:       [[PRED_STORE_IF607]]:
+; CHECK-NEXT:    [[TMP921:%.*]] = add i64 [[INDEX]], 304
+; CHECK-NEXT:    [[TMP922:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP921]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP922]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE608]]
+; CHECK:       [[PRED_STORE_CONTINUE608]]:
+; CHECK-NEXT:    [[TMP923:%.*]] = extractelement <64 x i1> [[TMP4]], i32 49
+; CHECK-NEXT:    br i1 [[TMP923]], label %[[PRED_STORE_IF609:.*]], label %[[PRED_STORE_CONTINUE610:.*]]
+; CHECK:       [[PRED_STORE_IF609]]:
+; CHECK-NEXT:    [[TMP924:%.*]] = add i64 [[INDEX]], 305
+; CHECK-NEXT:    [[TMP925:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP924]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP925]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE610]]
+; CHECK:       [[PRED_STORE_CONTINUE610]]:
+; CHECK-NEXT:    [[TMP926:%.*]] = extractelement <64 x i1> [[TMP4]], i32 50
+; CHECK-NEXT:    br i1 [[TMP926]], label %[[PRED_STORE_IF611:.*]], label %[[PRED_STORE_CONTINUE612:.*]]
+; CHECK:       [[PRED_STORE_IF611]]:
+; CHECK-NEXT:    [[TMP927:%.*]] = add i64 [[INDEX]], 306
+; CHECK-NEXT:    [[TMP928:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP927]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP928]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE612]]
+; CHECK:       [[PRED_STORE_CONTINUE612]]:
+; CHECK-NEXT:    [[TMP929:%.*]] = extractelement <64 x i1> [[TMP4]], i32 51
+; CHECK-NEXT:    br i1 [[TMP929]], label %[[PRED_STORE_IF613:.*]], label %[[PRED_STORE_CONTINUE614:.*]]
+; CHECK:       [[PRED_STORE_IF613]]:
+; CHECK-NEXT:    [[TMP930:%.*]] = add i64 [[INDEX]], 307
+; CHECK-NEXT:    [[TMP931:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP930]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP931]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE614]]
+; CHECK:       [[PRED_STORE_CONTINUE614]]:
+; CHECK-NEXT:    [[TMP932:%.*]] = extractelement <64 x i1> [[TMP4]], i32 52
+; CHECK-NEXT:    br i1 [[TMP932]], label %[[PRED_STORE_IF615:.*]], label %[[PRED_STORE_CONTINUE616:.*]]
+; CHECK:       [[PRED_STORE_IF615]]:
+; CHECK-NEXT:    [[TMP933:%.*]] = add i64 [[INDEX]], 308
+; CHECK-NEXT:    [[TMP934:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP933]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP934]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE616]]
+; CHECK:       [[PRED_STORE_CONTINUE616]]:
+; CHECK-NEXT:    [[TMP935:%.*]] = extractelement <64 x i1> [[TMP4]], i32 53
+; CHECK-NEXT:    br i1 [[TMP935]], label %[[PRED_STORE_IF617:.*]], label %[[PRED_STORE_CONTINUE618:.*]]
+; CHECK:       [[PRED_STORE_IF617]]:
+; CHECK-NEXT:    [[TMP936:%.*]] = add i64 [[INDEX]], 309
+; CHECK-NEXT:    [[TMP937:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP936]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP937]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE618]]
+; CHECK:       [[PRED_STORE_CONTINUE618]]:
+; CHECK-NEXT:    [[TMP938:%.*]] = extractelement <64 x i1> [[TMP4]], i32 54
+; CHECK-NEXT:    br i1 [[TMP938]], label %[[PRED_STORE_IF619:.*]], label %[[PRED_STORE_CONTINUE620:.*]]
+; CHECK:       [[PRED_STORE_IF619]]:
+; CHECK-NEXT:    [[TMP939:%.*]] = add i64 [[INDEX]], 310
+; CHECK-NEXT:    [[TMP940:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP939]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP940]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE620]]
+; CHECK:       [[PRED_STORE_CONTINUE620]]:
+; CHECK-NEXT:    [[TMP941:%.*]] = extractelement <64 x i1> [[TMP4]], i32 55
+; CHECK-NEXT:    br i1 [[TMP941]], label %[[PRED_STORE_IF621:.*]], label %[[PRED_STORE_CONTINUE622:.*]]
+; CHECK:       [[PRED_STORE_IF621]]:
+; CHECK-NEXT:    [[TMP942:%.*]] = add i64 [[INDEX]], 311
+; CHECK-NEXT:    [[TMP943:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP942]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP943]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE622]]
+; CHECK:       [[PRED_STORE_CONTINUE622]]:
+; CHECK-NEXT:    [[TMP944:%.*]] = extractelement <64 x i1> [[TMP4]], i32 56
+; CHECK-NEXT:    br i1 [[TMP944]], label %[[PRED_STORE_IF623:.*]], label %[[PRED_STORE_CONTINUE624:.*]]
+; CHECK:       [[PRED_STORE_IF623]]:
+; CHECK-NEXT:    [[TMP945:%.*]] = add i64 [[INDEX]], 312
+; CHECK-NEXT:    [[TMP946:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP945]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP946]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE624]]
+; CHECK:       [[PRED_STORE_CONTINUE624]]:
+; CHECK-NEXT:    [[TMP947:%.*]] = extractelement <64 x i1> [[TMP4]], i32 57
+; CHECK-NEXT:    br i1 [[TMP947]], label %[[PRED_STORE_IF625:.*]], label %[[PRED_STORE_CONTINUE626:.*]]
+; CHECK:       [[PRED_STORE_IF625]]:
+; CHECK-NEXT:    [[TMP948:%.*]] = add i64 [[INDEX]], 313
+; CHECK-NEXT:    [[TMP949:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP948]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP949]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE626]]
+; CHECK:       [[PRED_STORE_CONTINUE626]]:
+; CHECK-NEXT:    [[TMP950:%.*]] = extractelement <64 x i1> [[TMP4]], i32 58
+; CHECK-NEXT:    br i1 [[TMP950]], label %[[PRED_STORE_IF627:.*]], label %[[PRED_STORE_CONTINUE628:.*]]
+; CHECK:       [[PRED_STORE_IF627]]:
+; CHECK-NEXT:    [[TMP951:%.*]] = add i64 [[INDEX]], 314
+; CHECK-NEXT:    [[TMP952:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP951]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP952]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE628]]
+; CHECK:       [[PRED_STORE_CONTINUE628]]:
+; CHECK-NEXT:    [[TMP953:%.*]] = extractelement <64 x i1> [[TMP4]], i32 59
+; CHECK-NEXT:    br i1 [[TMP953]], label %[[PRED_STORE_IF629:.*]], label %[[PRED_STORE_CONTINUE630:.*]]
+; CHECK:       [[PRED_STORE_IF629]]:
+; CHECK-NEXT:    [[TMP954:%.*]] = add i64 [[INDEX]], 315
+; CHECK-NEXT:    [[TMP955:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP954]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP955]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE630]]
+; CHECK:       [[PRED_STORE_CONTINUE630]]:
+; CHECK-NEXT:    [[TMP956:%.*]] = extractelement <64 x i1> [[TMP4]], i32 60
+; CHECK-NEXT:    br i1 [[TMP956]], label %[[PRED_STORE_IF631:.*]], label %[[PRED_STORE_CONTINUE632:.*]]
+; CHECK:       [[PRED_STORE_IF631]]:
+; CHECK-NEXT:    [[TMP957:%.*]] = add i64 [[INDEX]], 316
+; CHECK-NEXT:    [[TMP958:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP957]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP958]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE632]]
+; CHECK:       [[PRED_STORE_CONTINUE632]]:
+; CHECK-NEXT:    [[TMP959:%.*]] = extractelement <64 x i1> [[TMP4]], i32 61
+; CHECK-NEXT:    br i1 [[TMP959]], label %[[PRED_STORE_IF633:.*]], label %[[PRED_STORE_CONTINUE634:.*]]
+; CHECK:       [[PRED_STORE_IF633]]:
+; CHECK-NEXT:    [[TMP960:%.*]] = add i64 [[INDEX]], 317
+; CHECK-NEXT:    [[TMP961:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP960]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP961]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE634]]
+; CHECK:       [[PRED_STORE_CONTINUE634]]:
+; CHECK-NEXT:    [[TMP962:%.*]] = extractelement <64 x i1> [[TMP4]], i32 62
+; CHECK-NEXT:    br i1 [[TMP962]], label %[[PRED_STORE_IF635:.*]], label %[[PRED_STORE_CONTINUE636:.*]]
+; CHECK:       [[PRED_STORE_IF635]]:
+; CHECK-NEXT:    [[TMP963:%.*]] = add i64 [[INDEX]], 318
+; CHECK-NEXT:    [[TMP964:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP963]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP964]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE636]]
+; CHECK:       [[PRED_STORE_CONTINUE636]]:
+; CHECK-NEXT:    [[TMP965:%.*]] = extractelement <64 x i1> [[TMP4]], i32 63
+; CHECK-NEXT:    br i1 [[TMP965]], label %[[PRED_STORE_IF637:.*]], label %[[PRED_STORE_CONTINUE638:.*]]
+; CHECK:       [[PRED_STORE_IF637]]:
+; CHECK-NEXT:    [[TMP966:%.*]] = add i64 [[INDEX]], 319
+; CHECK-NEXT:    [[TMP967:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP966]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP967]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE638]]
+; CHECK:       [[PRED_STORE_CONTINUE638]]:
+; CHECK-NEXT:    [[TMP968:%.*]] = extractelement <64 x i1> [[TMP5]], i32 0
+; CHECK-NEXT:    br i1 [[TMP968]], label %[[PRED_STORE_IF639:.*]], label %[[PRED_STORE_CONTINUE640:.*]]
+; CHECK:       [[PRED_STORE_IF639]]:
+; CHECK-NEXT:    [[TMP969:%.*]] = add i64 [[INDEX]], 320
+; CHECK-NEXT:    [[TMP970:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP969]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP970]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE640]]
+; CHECK:       [[PRED_STORE_CONTINUE640]]:
+; CHECK-NEXT:    [[TMP971:%.*]] = extractelement <64 x i1> [[TMP5]], i32 1
+; CHECK-NEXT:    br i1 [[TMP971]], label %[[PRED_STORE_IF641:.*]], label %[[PRED_STORE_CONTINUE642:.*]]
+; CHECK:       [[PRED_STORE_IF641]]:
+; CHECK-NEXT:    [[TMP972:%.*]] = add i64 [[INDEX]], 321
+; CHECK-NEXT:    [[TMP973:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP972]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP973]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE642]]
+; CHECK:       [[PRED_STORE_CONTINUE642]]:
+; CHECK-NEXT:    [[TMP974:%.*]] = extractelement <64 x i1> [[TMP5]], i32 2
+; CHECK-NEXT:    br i1 [[TMP974]], label %[[PRED_STORE_IF643:.*]], label %[[PRED_STORE_CONTINUE644:.*]]
+; CHECK:       [[PRED_STORE_IF643]]:
+; CHECK-NEXT:    [[TMP975:%.*]] = add i64 [[INDEX]], 322
+; CHECK-NEXT:    [[TMP976:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP975]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP976]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE644]]
+; CHECK:       [[PRED_STORE_CONTINUE644]]:
+; CHECK-NEXT:    [[TMP977:%.*]] = extractelement <64 x i1> [[TMP5]], i32 3
+; CHECK-NEXT:    br i1 [[TMP977]], label %[[PRED_STORE_IF645:.*]], label %[[PRED_STORE_CONTINUE646:.*]]
+; CHECK:       [[PRED_STORE_IF645]]:
+; CHECK-NEXT:    [[TMP978:%.*]] = add i64 [[INDEX]], 323
+; CHECK-NEXT:    [[TMP979:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP978]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP979]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE646]]
+; CHECK:       [[PRED_STORE_CONTINUE646]]:
+; CHECK-NEXT:    [[TMP980:%.*]] = extractelement <64 x i1> [[TMP5]], i32 4
+; CHECK-NEXT:    br i1 [[TMP980]], label %[[PRED_STORE_IF647:.*]], label %[[PRED_STORE_CONTINUE648:.*]]
+; CHECK:       [[PRED_STORE_IF647]]:
+; CHECK-NEXT:    [[TMP981:%.*]] = add i64 [[INDEX]], 324
+; CHECK-NEXT:    [[TMP982:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP981]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP982]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE648]]
+; CHECK:       [[PRED_STORE_CONTINUE648]]:
+; CHECK-NEXT:    [[TMP983:%.*]] = extractelement <64 x i1> [[TMP5]], i32 5
+; CHECK-NEXT:    br i1 [[TMP983]], label %[[PRED_STORE_IF649:.*]], label %[[PRED_STORE_CONTINUE650:.*]]
+; CHECK:       [[PRED_STORE_IF649]]:
+; CHECK-NEXT:    [[TMP984:%.*]] = add i64 [[INDEX]], 325
+; CHECK-NEXT:    [[TMP985:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP984]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP985]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE650]]
+; CHECK:       [[PRED_STORE_CONTINUE650]]:
+; CHECK-NEXT:    [[TMP986:%.*]] = extractelement <64 x i1> [[TMP5]], i32 6
+; CHECK-NEXT:    br i1 [[TMP986]], label %[[PRED_STORE_IF651:.*]], label %[[PRED_STORE_CONTINUE652:.*]]
+; CHECK:       [[PRED_STORE_IF651]]:
+; CHECK-NEXT:    [[TMP987:%.*]] = add i64 [[INDEX]], 326
+; CHECK-NEXT:    [[TMP988:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP987]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP988]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE652]]
+; CHECK:       [[PRED_STORE_CONTINUE652]]:
+; CHECK-NEXT:    [[TMP989:%.*]] = extractelement <64 x i1> [[TMP5]], i32 7
+; CHECK-NEXT:    br i1 [[TMP989]], label %[[PRED_STORE_IF653:.*]], label %[[PRED_STORE_CONTINUE654:.*]]
+; CHECK:       [[PRED_STORE_IF653]]:
+; CHECK-NEXT:    [[TMP990:%.*]] = add i64 [[INDEX]], 327
+; CHECK-NEXT:    [[TMP991:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP990]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP991]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE654]]
+; CHECK:       [[PRED_STORE_CONTINUE654]]:
+; CHECK-NEXT:    [[TMP992:%.*]] = extractelement <64 x i1> [[TMP5]], i32 8
+; CHECK-NEXT:    br i1 [[TMP992]], label %[[PRED_STORE_IF655:.*]], label %[[PRED_STORE_CONTINUE656:.*]]
+; CHECK:       [[PRED_STORE_IF655]]:
+; CHECK-NEXT:    [[TMP993:%.*]] = add i64 [[INDEX]], 328
+; CHECK-NEXT:    [[TMP994:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP993]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP994]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE656]]
+; CHECK:       [[PRED_STORE_CONTINUE656]]:
+; CHECK-NEXT:    [[TMP995:%.*]] = extractelement <64 x i1> [[TMP5]], i32 9
+; CHECK-NEXT:    br i1 [[TMP995]], label %[[PRED_STORE_IF657:.*]], label %[[PRED_STORE_CONTINUE658:.*]]
+; CHECK:       [[PRED_STORE_IF657]]:
+; CHECK-NEXT:    [[TMP996:%.*]] = add i64 [[INDEX]], 329
+; CHECK-NEXT:    [[TMP997:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP996]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP997]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE658]]
+; CHECK:       [[PRED_STORE_CONTINUE658]]:
+; CHECK-NEXT:    [[TMP998:%.*]] = extractelement <64 x i1> [[TMP5]], i32 10
+; CHECK-NEXT:    br i1 [[TMP998]], label %[[PRED_STORE_IF659:.*]], label %[[PRED_STORE_CONTINUE660:.*]]
+; CHECK:       [[PRED_STORE_IF659]]:
+; CHECK-NEXT:    [[TMP999:%.*]] = add i64 [[INDEX]], 330
+; CHECK-NEXT:    [[TMP1000:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP999]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1000]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE660]]
+; CHECK:       [[PRED_STORE_CONTINUE660]]:
+; CHECK-NEXT:    [[TMP1001:%.*]] = extractelement <64 x i1> [[TMP5]], i32 11
+; CHECK-NEXT:    br i1 [[TMP1001]], label %[[PRED_STORE_IF661:.*]], label %[[PRED_STORE_CONTINUE662:.*]]
+; CHECK:       [[PRED_STORE_IF661]]:
+; CHECK-NEXT:    [[TMP1002:%.*]] = add i64 [[INDEX]], 331
+; CHECK-NEXT:    [[TMP1003:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1002]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1003]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE662]]
+; CHECK:       [[PRED_STORE_CONTINUE662]]:
+; CHECK-NEXT:    [[TMP1004:%.*]] = extractelement <64 x i1> [[TMP5]], i32 12
+; CHECK-NEXT:    br i1 [[TMP1004]], label %[[PRED_STORE_IF663:.*]], label %[[PRED_STORE_CONTINUE664:.*]]
+; CHECK:       [[PRED_STORE_IF663]]:
+; CHECK-NEXT:    [[TMP1005:%.*]] = add i64 [[INDEX]], 332
+; CHECK-NEXT:    [[TMP1006:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1005]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1006]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE664]]
+; CHECK:       [[PRED_STORE_CONTINUE664]]:
+; CHECK-NEXT:    [[TMP1007:%.*]] = extractelement <64 x i1> [[TMP5]], i32 13
+; CHECK-NEXT:    br i1 [[TMP1007]], label %[[PRED_STORE_IF665:.*]], label %[[PRED_STORE_CONTINUE666:.*]]
+; CHECK:       [[PRED_STORE_IF665]]:
+; CHECK-NEXT:    [[TMP1008:%.*]] = add i64 [[INDEX]], 333
+; CHECK-NEXT:    [[TMP1009:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1008]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1009]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE666]]
+; CHECK:       [[PRED_STORE_CONTINUE666]]:
+; CHECK-NEXT:    [[TMP1010:%.*]] = extractelement <64 x i1> [[TMP5]], i32 14
+; CHECK-NEXT:    br i1 [[TMP1010]], label %[[PRED_STORE_IF667:.*]], label %[[PRED_STORE_CONTINUE668:.*]]
+; CHECK:       [[PRED_STORE_IF667]]:
+; CHECK-NEXT:    [[TMP1011:%.*]] = add i64 [[INDEX]], 334
+; CHECK-NEXT:    [[TMP1012:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1011]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1012]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE668]]
+; CHECK:       [[PRED_STORE_CONTINUE668]]:
+; CHECK-NEXT:    [[TMP1013:%.*]] = extractelement <64 x i1> [[TMP5]], i32 15
+; CHECK-NEXT:    br i1 [[TMP1013]], label %[[PRED_STORE_IF669:.*]], label %[[PRED_STORE_CONTINUE670:.*]]
+; CHECK:       [[PRED_STORE_IF669]]:
+; CHECK-NEXT:    [[TMP1014:%.*]] = add i64 [[INDEX]], 335
+; CHECK-NEXT:    [[TMP1015:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1014]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1015]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE670]]
+; CHECK:       [[PRED_STORE_CONTINUE670]]:
+; CHECK-NEXT:    [[TMP1016:%.*]] = extractelement <64 x i1> [[TMP5]], i32 16
+; CHECK-NEXT:    br i1 [[TMP1016]], label %[[PRED_STORE_IF671:.*]], label %[[PRED_STORE_CONTINUE672:.*]]
+; CHECK:       [[PRED_STORE_IF671]]:
+; CHECK-NEXT:    [[TMP1017:%.*]] = add i64 [[INDEX]], 336
+; CHECK-NEXT:    [[TMP1018:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1017]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1018]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE672]]
+; CHECK:       [[PRED_STORE_CONTINUE672]]:
+; CHECK-NEXT:    [[TMP1019:%.*]] = extractelement <64 x i1> [[TMP5]], i32 17
+; CHECK-NEXT:    br i1 [[TMP1019]], label %[[PRED_STORE_IF673:.*]], label %[[PRED_STORE_CONTINUE674:.*]]
+; CHECK:       [[PRED_STORE_IF673]]:
+; CHECK-NEXT:    [[TMP1020:%.*]] = add i64 [[INDEX]], 337
+; CHECK-NEXT:    [[TMP1021:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1020]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1021]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE674]]
+; CHECK:       [[PRED_STORE_CONTINUE674]]:
+; CHECK-NEXT:    [[TMP1022:%.*]] = extractelement <64 x i1> [[TMP5]], i32 18
+; CHECK-NEXT:    br i1 [[TMP1022]], label %[[PRED_STORE_IF675:.*]], label %[[PRED_STORE_CONTINUE676:.*]]
+; CHECK:       [[PRED_STORE_IF675]]:
+; CHECK-NEXT:    [[TMP1023:%.*]] = add i64 [[INDEX]], 338
+; CHECK-NEXT:    [[TMP1024:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1023]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1024]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE676]]
+; CHECK:       [[PRED_STORE_CONTINUE676]]:
+; CHECK-NEXT:    [[TMP1025:%.*]] = extractelement <64 x i1> [[TMP5]], i32 19
+; CHECK-NEXT:    br i1 [[TMP1025]], label %[[PRED_STORE_IF677:.*]], label %[[PRED_STORE_CONTINUE678:.*]]
+; CHECK:       [[PRED_STORE_IF677]]:
+; CHECK-NEXT:    [[TMP1026:%.*]] = add i64 [[INDEX]], 339
+; CHECK-NEXT:    [[TMP1027:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1026]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1027]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE678]]
+; CHECK:       [[PRED_STORE_CONTINUE678]]:
+; CHECK-NEXT:    [[TMP1028:%.*]] = extractelement <64 x i1> [[TMP5]], i32 20
+; CHECK-NEXT:    br i1 [[TMP1028]], label %[[PRED_STORE_IF679:.*]], label %[[PRED_STORE_CONTINUE680:.*]]
+; CHECK:       [[PRED_STORE_IF679]]:
+; CHECK-NEXT:    [[TMP1029:%.*]] = add i64 [[INDEX]], 340
+; CHECK-NEXT:    [[TMP1030:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1029]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1030]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE680]]
+; CHECK:       [[PRED_STORE_CONTINUE680]]:
+; CHECK-NEXT:    [[TMP1031:%.*]] = extractelement <64 x i1> [[TMP5]], i32 21
+; CHECK-NEXT:    br i1 [[TMP1031]], label %[[PRED_STORE_IF681:.*]], label %[[PRED_STORE_CONTINUE682:.*]]
+; CHECK:       [[PRED_STORE_IF681]]:
+; CHECK-NEXT:    [[TMP1032:%.*]] = add i64 [[INDEX]], 341
+; CHECK-NEXT:    [[TMP1033:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1032]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1033]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE682]]
+; CHECK:       [[PRED_STORE_CONTINUE682]]:
+; CHECK-NEXT:    [[TMP1034:%.*]] = extractelement <64 x i1> [[TMP5]], i32 22
+; CHECK-NEXT:    br i1 [[TMP1034]], label %[[PRED_STORE_IF683:.*]], label %[[PRED_STORE_CONTINUE684:.*]]
+; CHECK:       [[PRED_STORE_IF683]]:
+; CHECK-NEXT:    [[TMP1035:%.*]] = add i64 [[INDEX]], 342
+; CHECK-NEXT:    [[TMP1036:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1035]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1036]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE684]]
+; CHECK:       [[PRED_STORE_CONTINUE684]]:
+; CHECK-NEXT:    [[TMP1037:%.*]] = extractelement <64 x i1> [[TMP5]], i32 23
+; CHECK-NEXT:    br i1 [[TMP1037]], label %[[PRED_STORE_IF685:.*]], label %[[PRED_STORE_CONTINUE686:.*]]
+; CHECK:       [[PRED_STORE_IF685]]:
+; CHECK-NEXT:    [[TMP1038:%.*]] = add i64 [[INDEX]], 343
+; CHECK-NEXT:    [[TMP1039:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1038]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1039]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE686]]
+; CHECK:       [[PRED_STORE_CONTINUE686]]:
+; CHECK-NEXT:    [[TMP1040:%.*]] = extractelement <64 x i1> [[TMP5]], i32 24
+; CHECK-NEXT:    br i1 [[TMP1040]], label %[[PRED_STORE_IF687:.*]], label %[[PRED_STORE_CONTINUE688:.*]]
+; CHECK:       [[PRED_STORE_IF687]]:
+; CHECK-NEXT:    [[TMP1041:%.*]] = add i64 [[INDEX]], 344
+; CHECK-NEXT:    [[TMP1042:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1041]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1042]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE688]]
+; CHECK:       [[PRED_STORE_CONTINUE688]]:
+; CHECK-NEXT:    [[TMP1043:%.*]] = extractelement <64 x i1> [[TMP5]], i32 25
+; CHECK-NEXT:    br i1 [[TMP1043]], label %[[PRED_STORE_IF689:.*]], label %[[PRED_STORE_CONTINUE690:.*]]
+; CHECK:       [[PRED_STORE_IF689]]:
+; CHECK-NEXT:    [[TMP1044:%.*]] = add i64 [[INDEX]], 345
+; CHECK-NEXT:    [[TMP1045:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1044]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1045]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE690]]
+; CHECK:       [[PRED_STORE_CONTINUE690]]:
+; CHECK-NEXT:    [[TMP1046:%.*]] = extractelement <64 x i1> [[TMP5]], i32 26
+; CHECK-NEXT:    br i1 [[TMP1046]], label %[[PRED_STORE_IF691:.*]], label %[[PRED_STORE_CONTINUE692:.*]]
+; CHECK:       [[PRED_STORE_IF691]]:
+; CHECK-NEXT:    [[TMP1047:%.*]] = add i64 [[INDEX]], 346
+; CHECK-NEXT:    [[TMP1048:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1047]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1048]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE692]]
+; CHECK:       [[PRED_STORE_CONTINUE692]]:
+; CHECK-NEXT:    [[TMP1049:%.*]] = extractelement <64 x i1> [[TMP5]], i32 27
+; CHECK-NEXT:    br i1 [[TMP1049]], label %[[PRED_STORE_IF693:.*]], label %[[PRED_STORE_CONTINUE694:.*]]
+; CHECK:       [[PRED_STORE_IF693]]:
+; CHECK-NEXT:    [[TMP1050:%.*]] = add i64 [[INDEX]], 347
+; CHECK-NEXT:    [[TMP1051:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1050]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1051]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE694]]
+; CHECK:       [[PRED_STORE_CONTINUE694]]:
+; CHECK-NEXT:    [[TMP1052:%.*]] = extractelement <64 x i1> [[TMP5]], i32 28
+; CHECK-NEXT:    br i1 [[TMP1052]], label %[[PRED_STORE_IF695:.*]], label %[[PRED_STORE_CONTINUE696:.*]]
+; CHECK:       [[PRED_STORE_IF695]]:
+; CHECK-NEXT:    [[TMP1053:%.*]] = add i64 [[INDEX]], 348
+; CHECK-NEXT:    [[TMP1054:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1053]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1054]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE696]]
+; CHECK:       [[PRED_STORE_CONTINUE696]]:
+; CHECK-NEXT:    [[TMP1055:%.*]] = extractelement <64 x i1> [[TMP5]], i32 29
+; CHECK-NEXT:    br i1 [[TMP1055]], label %[[PRED_STORE_IF697:.*]], label %[[PRED_STORE_CONTINUE698:.*]]
+; CHECK:       [[PRED_STORE_IF697]]:
+; CHECK-NEXT:    [[TMP1056:%.*]] = add i64 [[INDEX]], 349
+; CHECK-NEXT:    [[TMP1057:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1056]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1057]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE698]]
+; CHECK:       [[PRED_STORE_CONTINUE698]]:
+; CHECK-NEXT:    [[TMP1058:%.*]] = extractelement <64 x i1> [[TMP5]], i32 30
+; CHECK-NEXT:    br i1 [[TMP1058]], label %[[PRED_STORE_IF699:.*]], label %[[PRED_STORE_CONTINUE700:.*]]
+; CHECK:       [[PRED_STORE_IF699]]:
+; CHECK-NEXT:    [[TMP1059:%.*]] = add i64 [[INDEX]], 350
+; CHECK-NEXT:    [[TMP1060:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1059]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1060]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE700]]
+; CHECK:       [[PRED_STORE_CONTINUE700]]:
+; CHECK-NEXT:    [[TMP1061:%.*]] = extractelement <64 x i1> [[TMP5]], i32 31
+; CHECK-NEXT:    br i1 [[TMP1061]], label %[[PRED_STORE_IF701:.*]], label %[[PRED_STORE_CONTINUE702:.*]]
+; CHECK:       [[PRED_STORE_IF701]]:
+; CHECK-NEXT:    [[TMP1062:%.*]] = add i64 [[INDEX]], 351
+; CHECK-NEXT:    [[TMP1063:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1062]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1063]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE702]]
+; CHECK:       [[PRED_STORE_CONTINUE702]]:
+; CHECK-NEXT:    [[TMP1064:%.*]] = extractelement <64 x i1> [[TMP5]], i32 32
+; CHECK-NEXT:    br i1 [[TMP1064]], label %[[PRED_STORE_IF703:.*]], label %[[PRED_STORE_CONTINUE704:.*]]
+; CHECK:       [[PRED_STORE_IF703]]:
+; CHECK-NEXT:    [[TMP1065:%.*]] = add i64 [[INDEX]], 352
+; CHECK-NEXT:    [[TMP1066:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1065]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1066]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE704]]
+; CHECK:       [[PRED_STORE_CONTINUE704]]:
+; CHECK-NEXT:    [[TMP1067:%.*]] = extractelement <64 x i1> [[TMP5]], i32 33
+; CHECK-NEXT:    br i1 [[TMP1067]], label %[[PRED_STORE_IF705:.*]], label %[[PRED_STORE_CONTINUE706:.*]]
+; CHECK:       [[PRED_STORE_IF705]]:
+; CHECK-NEXT:    [[TMP1068:%.*]] = add i64 [[INDEX]], 353
+; CHECK-NEXT:    [[TMP1069:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1068]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1069]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE706]]
+; CHECK:       [[PRED_STORE_CONTINUE706]]:
+; CHECK-NEXT:    [[TMP1070:%.*]] = extractelement <64 x i1> [[TMP5]], i32 34
+; CHECK-NEXT:    br i1 [[TMP1070]], label %[[PRED_STORE_IF707:.*]], label %[[PRED_STORE_CONTINUE708:.*]]
+; CHECK:       [[PRED_STORE_IF707]]:
+; CHECK-NEXT:    [[TMP1071:%.*]] = add i64 [[INDEX]], 354
+; CHECK-NEXT:    [[TMP1072:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1071]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1072]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE708]]
+; CHECK:       [[PRED_STORE_CONTINUE708]]:
+; CHECK-NEXT:    [[TMP1073:%.*]] = extractelement <64 x i1> [[TMP5]], i32 35
+; CHECK-NEXT:    br i1 [[TMP1073]], label %[[PRED_STORE_IF709:.*]], label %[[PRED_STORE_CONTINUE710:.*]]
+; CHECK:       [[PRED_STORE_IF709]]:
+; CHECK-NEXT:    [[TMP1074:%.*]] = add i64 [[INDEX]], 355
+; CHECK-NEXT:    [[TMP1075:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1074]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1075]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE710]]
+; CHECK:       [[PRED_STORE_CONTINUE710]]:
+; CHECK-NEXT:    [[TMP1076:%.*]] = extractelement <64 x i1> [[TMP5]], i32 36
+; CHECK-NEXT:    br i1 [[TMP1076]], label %[[PRED_STORE_IF711:.*]], label %[[PRED_STORE_CONTINUE712:.*]]
+; CHECK:       [[PRED_STORE_IF711]]:
+; CHECK-NEXT:    [[TMP1077:%.*]] = add i64 [[INDEX]], 356
+; CHECK-NEXT:    [[TMP1078:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1077]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1078]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE712]]
+; CHECK:       [[PRED_STORE_CONTINUE712]]:
+; CHECK-NEXT:    [[TMP1079:%.*]] = extractelement <64 x i1> [[TMP5]], i32 37
+; CHECK-NEXT:    br i1 [[TMP1079]], label %[[PRED_STORE_IF713:.*]], label %[[PRED_STORE_CONTINUE714:.*]]
+; CHECK:       [[PRED_STORE_IF713]]:
+; CHECK-NEXT:    [[TMP1080:%.*]] = add i64 [[INDEX]], 357
+; CHECK-NEXT:    [[TMP1081:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1080]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1081]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE714]]
+; CHECK:       [[PRED_STORE_CONTINUE714]]:
+; CHECK-NEXT:    [[TMP1082:%.*]] = extractelement <64 x i1> [[TMP5]], i32 38
+; CHECK-NEXT:    br i1 [[TMP1082]], label %[[PRED_STORE_IF715:.*]], label %[[PRED_STORE_CONTINUE716:.*]]
+; CHECK:       [[PRED_STORE_IF715]]:
+; CHECK-NEXT:    [[TMP1083:%.*]] = add i64 [[INDEX]], 358
+; CHECK-NEXT:    [[TMP1084:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1083]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1084]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE716]]
+; CHECK:       [[PRED_STORE_CONTINUE716]]:
+; CHECK-NEXT:    [[TMP1085:%.*]] = extractelement <64 x i1> [[TMP5]], i32 39
+; CHECK-NEXT:    br i1 [[TMP1085]], label %[[PRED_STORE_IF717:.*]], label %[[PRED_STORE_CONTINUE718:.*]]
+; CHECK:       [[PRED_STORE_IF717]]:
+; CHECK-NEXT:    [[TMP1086:%.*]] = add i64 [[INDEX]], 359
+; CHECK-NEXT:    [[TMP1087:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1086]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1087]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE718]]
+; CHECK:       [[PRED_STORE_CONTINUE718]]:
+; CHECK-NEXT:    [[TMP1088:%.*]] = extractelement <64 x i1> [[TMP5]], i32 40
+; CHECK-NEXT:    br i1 [[TMP1088]], label %[[PRED_STORE_IF719:.*]], label %[[PRED_STORE_CONTINUE720:.*]]
+; CHECK:       [[PRED_STORE_IF719]]:
+; CHECK-NEXT:    [[TMP1089:%.*]] = add i64 [[INDEX]], 360
+; CHECK-NEXT:    [[TMP1090:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1089]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1090]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE720]]
+; CHECK:       [[PRED_STORE_CONTINUE720]]:
+; CHECK-NEXT:    [[TMP1091:%.*]] = extractelement <64 x i1> [[TMP5]], i32 41
+; CHECK-NEXT:    br i1 [[TMP1091]], label %[[PRED_STORE_IF721:.*]], label %[[PRED_STORE_CONTINUE722:.*]]
+; CHECK:       [[PRED_STORE_IF721]]:
+; CHECK-NEXT:    [[TMP1092:%.*]] = add i64 [[INDEX]], 361
+; CHECK-NEXT:    [[TMP1093:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1092]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1093]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE722]]
+; CHECK:       [[PRED_STORE_CONTINUE722]]:
+; CHECK-NEXT:    [[TMP1094:%.*]] = extractelement <64 x i1> [[TMP5]], i32 42
+; CHECK-NEXT:    br i1 [[TMP1094]], label %[[PRED_STORE_IF723:.*]], label %[[PRED_STORE_CONTINUE724:.*]]
+; CHECK:       [[PRED_STORE_IF723]]:
+; CHECK-NEXT:    [[TMP1095:%.*]] = add i64 [[INDEX]], 362
+; CHECK-NEXT:    [[TMP1096:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1095]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1096]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE724]]
+; CHECK:       [[PRED_STORE_CONTINUE724]]:
+; CHECK-NEXT:    [[TMP1097:%.*]] = extractelement <64 x i1> [[TMP5]], i32 43
+; CHECK-NEXT:    br i1 [[TMP1097]], label %[[PRED_STORE_IF725:.*]], label %[[PRED_STORE_CONTINUE726:.*]]
+; CHECK:       [[PRED_STORE_IF725]]:
+; CHECK-NEXT:    [[TMP1098:%.*]] = add i64 [[INDEX]], 363
+; CHECK-NEXT:    [[TMP1099:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1098]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1099]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE726]]
+; CHECK:       [[PRED_STORE_CONTINUE726]]:
+; CHECK-NEXT:    [[TMP1100:%.*]] = extractelement <64 x i1> [[TMP5]], i32 44
+; CHECK-NEXT:    br i1 [[TMP1100]], label %[[PRED_STORE_IF727:.*]], label %[[PRED_STORE_CONTINUE728:.*]]
+; CHECK:       [[PRED_STORE_IF727]]:
+; CHECK-NEXT:    [[TMP1101:%.*]] = add i64 [[INDEX]], 364
+; CHECK-NEXT:    [[TMP1102:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1101]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1102]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE728]]
+; CHECK:       [[PRED_STORE_CONTINUE728]]:
+; CHECK-NEXT:    [[TMP1103:%.*]] = extractelement <64 x i1> [[TMP5]], i32 45
+; CHECK-NEXT:    br i1 [[TMP1103]], label %[[PRED_STORE_IF729:.*]], label %[[PRED_STORE_CONTINUE730:.*]]
+; CHECK:       [[PRED_STORE_IF729]]:
+; CHECK-NEXT:    [[TMP1104:%.*]] = add i64 [[INDEX]], 365
+; CHECK-NEXT:    [[TMP1105:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1104]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1105]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE730]]
+; CHECK:       [[PRED_STORE_CONTINUE730]]:
+; CHECK-NEXT:    [[TMP1106:%.*]] = extractelement <64 x i1> [[TMP5]], i32 46
+; CHECK-NEXT:    br i1 [[TMP1106]], label %[[PRED_STORE_IF731:.*]], label %[[PRED_STORE_CONTINUE732:.*]]
+; CHECK:       [[PRED_STORE_IF731]]:
+; CHECK-NEXT:    [[TMP1107:%.*]] = add i64 [[INDEX]], 366
+; CHECK-NEXT:    [[TMP1108:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1107]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1108]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE732]]
+; CHECK:       [[PRED_STORE_CONTINUE732]]:
+; CHECK-NEXT:    [[TMP1109:%.*]] = extractelement <64 x i1> [[TMP5]], i32 47
+; CHECK-NEXT:    br i1 [[TMP1109]], label %[[PRED_STORE_IF733:.*]], label %[[PRED_STORE_CONTINUE734:.*]]
+; CHECK:       [[PRED_STORE_IF733]]:
+; CHECK-NEXT:    [[TMP1110:%.*]] = add i64 [[INDEX]], 367
+; CHECK-NEXT:    [[TMP1111:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1110]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1111]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE734]]
+; CHECK:       [[PRED_STORE_CONTINUE734]]:
+; CHECK-NEXT:    [[TMP1112:%.*]] = extractelement <64 x i1> [[TMP5]], i32 48
+; CHECK-NEXT:    br i1 [[TMP1112]], label %[[PRED_STORE_IF735:.*]], label %[[PRED_STORE_CONTINUE736:.*]]
+; CHECK:       [[PRED_STORE_IF735]]:
+; CHECK-NEXT:    [[TMP1113:%.*]] = add i64 [[INDEX]], 368
+; CHECK-NEXT:    [[TMP1114:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1113]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1114]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE736]]
+; CHECK:       [[PRED_STORE_CONTINUE736]]:
+; CHECK-NEXT:    [[TMP1115:%.*]] = extractelement <64 x i1> [[TMP5]], i32 49
+; CHECK-NEXT:    br i1 [[TMP1115]], label %[[PRED_STORE_IF737:.*]], label %[[PRED_STORE_CONTINUE738:.*]]
+; CHECK:       [[PRED_STORE_IF737]]:
+; CHECK-NEXT:    [[TMP1116:%.*]] = add i64 [[INDEX]], 369
+; CHECK-NEXT:    [[TMP1117:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1116]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1117]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE738]]
+; CHECK:       [[PRED_STORE_CONTINUE738]]:
+; CHECK-NEXT:    [[TMP1118:%.*]] = extractelement <64 x i1> [[TMP5]], i32 50
+; CHECK-NEXT:    br i1 [[TMP1118]], label %[[PRED_STORE_IF739:.*]], label %[[PRED_STORE_CONTINUE740:.*]]
+; CHECK:       [[PRED_STORE_IF739]]:
+; CHECK-NEXT:    [[TMP1119:%.*]] = add i64 [[INDEX]], 370
+; CHECK-NEXT:    [[TMP1120:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1119]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1120]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE740]]
+; CHECK:       [[PRED_STORE_CONTINUE740]]:
+; CHECK-NEXT:    [[TMP1121:%.*]] = extractelement <64 x i1> [[TMP5]], i32 51
+; CHECK-NEXT:    br i1 [[TMP1121]], label %[[PRED_STORE_IF741:.*]], label %[[PRED_STORE_CONTINUE742:.*]]
+; CHECK:       [[PRED_STORE_IF741]]:
+; CHECK-NEXT:    [[TMP1122:%.*]] = add i64 [[INDEX]], 371
+; CHECK-NEXT:    [[TMP1123:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1122]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1123]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE742]]
+; CHECK:       [[PRED_STORE_CONTINUE742]]:
+; CHECK-NEXT:    [[TMP1124:%.*]] = extractelement <64 x i1> [[TMP5]], i32 52
+; CHECK-NEXT:    br i1 [[TMP1124]], label %[[PRED_STORE_IF743:.*]], label %[[PRED_STORE_CONTINUE744:.*]]
+; CHECK:       [[PRED_STORE_IF743]]:
+; CHECK-NEXT:    [[TMP1125:%.*]] = add i64 [[INDEX]], 372
+; CHECK-NEXT:    [[TMP1126:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1125]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1126]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE744]]
+; CHECK:       [[PRED_STORE_CONTINUE744]]:
+; CHECK-NEXT:    [[TMP1127:%.*]] = extractelement <64 x i1> [[TMP5]], i32 53
+; CHECK-NEXT:    br i1 [[TMP1127]], label %[[PRED_STORE_IF745:.*]], label %[[PRED_STORE_CONTINUE746:.*]]
+; CHECK:       [[PRED_STORE_IF745]]:
+; CHECK-NEXT:    [[TMP1128:%.*]] = add i64 [[INDEX]], 373
+; CHECK-NEXT:    [[TMP1129:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1128]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1129]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE746]]
+; CHECK:       [[PRED_STORE_CONTINUE746]]:
+; CHECK-NEXT:    [[TMP1130:%.*]] = extractelement <64 x i1> [[TMP5]], i32 54
+; CHECK-NEXT:    br i1 [[TMP1130]], label %[[PRED_STORE_IF747:.*]], label %[[PRED_STORE_CONTINUE748:.*]]
+; CHECK:       [[PRED_STORE_IF747]]:
+; CHECK-NEXT:    [[TMP1131:%.*]] = add i64 [[INDEX]], 374
+; CHECK-NEXT:    [[TMP1132:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1131]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1132]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE748]]
+; CHECK:       [[PRED_STORE_CONTINUE748]]:
+; CHECK-NEXT:    [[TMP1133:%.*]] = extractelement <64 x i1> [[TMP5]], i32 55
+; CHECK-NEXT:    br i1 [[TMP1133]], label %[[PRED_STORE_IF749:.*]], label %[[PRED_STORE_CONTINUE750:.*]]
+; CHECK:       [[PRED_STORE_IF749]]:
+; CHECK-NEXT:    [[TMP1134:%.*]] = add i64 [[INDEX]], 375
+; CHECK-NEXT:    [[TMP1135:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1134]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1135]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE750]]
+; CHECK:       [[PRED_STORE_CONTINUE750]]:
+; CHECK-NEXT:    [[TMP1136:%.*]] = extractelement <64 x i1> [[TMP5]], i32 56
+; CHECK-NEXT:    br i1 [[TMP1136]], label %[[PRED_STORE_IF751:.*]], label %[[PRED_STORE_CONTINUE752:.*]]
+; CHECK:       [[PRED_STORE_IF751]]:
+; CHECK-NEXT:    [[TMP1137:%.*]] = add i64 [[INDEX]], 376
+; CHECK-NEXT:    [[TMP1138:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1137]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1138]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE752]]
+; CHECK:       [[PRED_STORE_CONTINUE752]]:
+; CHECK-NEXT:    [[TMP1139:%.*]] = extractelement <64 x i1> [[TMP5]], i32 57
+; CHECK-NEXT:    br i1 [[TMP1139]], label %[[PRED_STORE_IF753:.*]], label %[[PRED_STORE_CONTINUE754:.*]]
+; CHECK:       [[PRED_STORE_IF753]]:
+; CHECK-NEXT:    [[TMP1140:%.*]] = add i64 [[INDEX]], 377
+; CHECK-NEXT:    [[TMP1141:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1140]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1141]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE754]]
+; CHECK:       [[PRED_STORE_CONTINUE754]]:
+; CHECK-NEXT:    [[TMP1142:%.*]] = extractelement <64 x i1> [[TMP5]], i32 58
+; CHECK-NEXT:    br i1 [[TMP1142]], label %[[PRED_STORE_IF755:.*]], label %[[PRED_STORE_CONTINUE756:.*]]
+; CHECK:       [[PRED_STORE_IF755]]:
+; CHECK-NEXT:    [[TMP1143:%.*]] = add i64 [[INDEX]], 378
+; CHECK-NEXT:    [[TMP1144:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1143]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1144]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE756]]
+; CHECK:       [[PRED_STORE_CONTINUE756]]:
+; CHECK-NEXT:    [[TMP1145:%.*]] = extractelement <64 x i1> [[TMP5]], i32 59
+; CHECK-NEXT:    br i1 [[TMP1145]], label %[[PRED_STORE_IF757:.*]], label %[[PRED_STORE_CONTINUE758:.*]]
+; CHECK:       [[PRED_STORE_IF757]]:
+; CHECK-NEXT:    [[TMP1146:%.*]] = add i64 [[INDEX]], 379
+; CHECK-NEXT:    [[TMP1147:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1146]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1147]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE758]]
+; CHECK:       [[PRED_STORE_CONTINUE758]]:
+; CHECK-NEXT:    [[TMP1148:%.*]] = extractelement <64 x i1> [[TMP5]], i32 60
+; CHECK-NEXT:    br i1 [[TMP1148]], label %[[PRED_STORE_IF759:.*]], label %[[PRED_STORE_CONTINUE760:.*]]
+; CHECK:       [[PRED_STORE_IF759]]:
+; CHECK-NEXT:    [[TMP1149:%.*]] = add i64 [[INDEX]], 380
+; CHECK-NEXT:    [[TMP1150:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1149]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1150]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE760]]
+; CHECK:       [[PRED_STORE_CONTINUE760]]:
+; CHECK-NEXT:    [[TMP1151:%.*]] = extractelement <64 x i1> [[TMP5]], i32 61
+; CHECK-NEXT:    br i1 [[TMP1151]], label %[[PRED_STORE_IF761:.*]], label %[[PRED_STORE_CONTINUE762:.*]]
+; CHECK:       [[PRED_STORE_IF761]]:
+; CHECK-NEXT:    [[TMP1152:%.*]] = add i64 [[INDEX]], 381
+; CHECK-NEXT:    [[TMP1153:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1152]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1153]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE762]]
+; CHECK:       [[PRED_STORE_CONTINUE762]]:
+; CHECK-NEXT:    [[TMP1154:%.*]] = extractelement <64 x i1> [[TMP5]], i32 62
+; CHECK-NEXT:    br i1 [[TMP1154]], label %[[PRED_STORE_IF763:.*]], label %[[PRED_STORE_CONTINUE764:.*]]
+; CHECK:       [[PRED_STORE_IF763]]:
+; CHECK-NEXT:    [[TMP1155:%.*]] = add i64 [[INDEX]], 382
+; CHECK-NEXT:    [[TMP1156:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1155]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1156]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE764]]
+; CHECK:       [[PRED_STORE_CONTINUE764]]:
+; CHECK-NEXT:    [[TMP1157:%.*]] = extractelement <64 x i1> [[TMP5]], i32 63
+; CHECK-NEXT:    br i1 [[TMP1157]], label %[[PRED_STORE_IF765:.*]], label %[[PRED_STORE_CONTINUE766:.*]]
+; CHECK:       [[PRED_STORE_IF765]]:
+; CHECK-NEXT:    [[TMP1158:%.*]] = add i64 [[INDEX]], 383
+; CHECK-NEXT:    [[TMP1159:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1158]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1159]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE766]]
+; CHECK:       [[PRED_STORE_CONTINUE766]]:
+; CHECK-NEXT:    [[TMP1160:%.*]] = extractelement <64 x i1> [[TMP6]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1160]], label %[[PRED_STORE_IF767:.*]], label %[[PRED_STORE_CONTINUE768:.*]]
+; CHECK:       [[PRED_STORE_IF767]]:
+; CHECK-NEXT:    [[TMP1161:%.*]] = add i64 [[INDEX]], 384
+; CHECK-NEXT:    [[TMP1162:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1161]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1162]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE768]]
+; CHECK:       [[PRED_STORE_CONTINUE768]]:
+; CHECK-NEXT:    [[TMP1163:%.*]] = extractelement <64 x i1> [[TMP6]], i32 1
+; CHECK-NEXT:    br i1 [[TMP1163]], label %[[PRED_STORE_IF769:.*]], label %[[PRED_STORE_CONTINUE770:.*]]
+; CHECK:       [[PRED_STORE_IF769]]:
+; CHECK-NEXT:    [[TMP1164:%.*]] = add i64 [[INDEX]], 385
+; CHECK-NEXT:    [[TMP1165:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1164]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1165]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE770]]
+; CHECK:       [[PRED_STORE_CONTINUE770]]:
+; CHECK-NEXT:    [[TMP1166:%.*]] = extractelement <64 x i1> [[TMP6]], i32 2
+; CHECK-NEXT:    br i1 [[TMP1166]], label %[[PRED_STORE_IF771:.*]], label %[[PRED_STORE_CONTINUE772:.*]]
+; CHECK:       [[PRED_STORE_IF771]]:
+; CHECK-NEXT:    [[TMP1167:%.*]] = add i64 [[INDEX]], 386
+; CHECK-NEXT:    [[TMP1168:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1167]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1168]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE772]]
+; CHECK:       [[PRED_STORE_CONTINUE772]]:
+; CHECK-NEXT:    [[TMP1169:%.*]] = extractelement <64 x i1> [[TMP6]], i32 3
+; CHECK-NEXT:    br i1 [[TMP1169]], label %[[PRED_STORE_IF773:.*]], label %[[PRED_STORE_CONTINUE774:.*]]
+; CHECK:       [[PRED_STORE_IF773]]:
+; CHECK-NEXT:    [[TMP1170:%.*]] = add i64 [[INDEX]], 387
+; CHECK-NEXT:    [[TMP1171:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1170]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1171]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE774]]
+; CHECK:       [[PRED_STORE_CONTINUE774]]:
+; CHECK-NEXT:    [[TMP1172:%.*]] = extractelement <64 x i1> [[TMP6]], i32 4
+; CHECK-NEXT:    br i1 [[TMP1172]], label %[[PRED_STORE_IF775:.*]], label %[[PRED_STORE_CONTINUE776:.*]]
+; CHECK:       [[PRED_STORE_IF775]]:
+; CHECK-NEXT:    [[TMP1173:%.*]] = add i64 [[INDEX]], 388
+; CHECK-NEXT:    [[TMP1174:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1173]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1174]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE776]]
+; CHECK:       [[PRED_STORE_CONTINUE776]]:
+; CHECK-NEXT:    [[TMP1175:%.*]] = extractelement <64 x i1> [[TMP6]], i32 5
+; CHECK-NEXT:    br i1 [[TMP1175]], label %[[PRED_STORE_IF777:.*]], label %[[PRED_STORE_CONTINUE778:.*]]
+; CHECK:       [[PRED_STORE_IF777]]:
+; CHECK-NEXT:    [[TMP1176:%.*]] = add i64 [[INDEX]], 389
+; CHECK-NEXT:    [[TMP1177:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1176]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1177]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE778]]
+; CHECK:       [[PRED_STORE_CONTINUE778]]:
+; CHECK-NEXT:    [[TMP1178:%.*]] = extractelement <64 x i1> [[TMP6]], i32 6
+; CHECK-NEXT:    br i1 [[TMP1178]], label %[[PRED_STORE_IF779:.*]], label %[[PRED_STORE_CONTINUE780:.*]]
+; CHECK:       [[PRED_STORE_IF779]]:
+; CHECK-NEXT:    [[TMP1179:%.*]] = add i64 [[INDEX]], 390
+; CHECK-NEXT:    [[TMP1180:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1179]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1180]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE780]]
+; CHECK:       [[PRED_STORE_CONTINUE780]]:
+; CHECK-NEXT:    [[TMP1181:%.*]] = extractelement <64 x i1> [[TMP6]], i32 7
+; CHECK-NEXT:    br i1 [[TMP1181]], label %[[PRED_STORE_IF781:.*]], label %[[PRED_STORE_CONTINUE782:.*]]
+; CHECK:       [[PRED_STORE_IF781]]:
+; CHECK-NEXT:    [[TMP1182:%.*]] = add i64 [[INDEX]], 391
+; CHECK-NEXT:    [[TMP1183:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1182]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1183]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE782]]
+; CHECK:       [[PRED_STORE_CONTINUE782]]:
+; CHECK-NEXT:    [[TMP1184:%.*]] = extractelement <64 x i1> [[TMP6]], i32 8
+; CHECK-NEXT:    br i1 [[TMP1184]], label %[[PRED_STORE_IF783:.*]], label %[[PRED_STORE_CONTINUE784:.*]]
+; CHECK:       [[PRED_STORE_IF783]]:
+; CHECK-NEXT:    [[TMP1185:%.*]] = add i64 [[INDEX]], 392
+; CHECK-NEXT:    [[TMP1186:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1185]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1186]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE784]]
+; CHECK:       [[PRED_STORE_CONTINUE784]]:
+; CHECK-NEXT:    [[TMP1187:%.*]] = extractelement <64 x i1> [[TMP6]], i32 9
+; CHECK-NEXT:    br i1 [[TMP1187]], label %[[PRED_STORE_IF785:.*]], label %[[PRED_STORE_CONTINUE786:.*]]
+; CHECK:       [[PRED_STORE_IF785]]:
+; CHECK-NEXT:    [[TMP1188:%.*]] = add i64 [[INDEX]], 393
+; CHECK-NEXT:    [[TMP1189:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1188]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1189]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE786]]
+; CHECK:       [[PRED_STORE_CONTINUE786]]:
+; CHECK-NEXT:    [[TMP1190:%.*]] = extractelement <64 x i1> [[TMP6]], i32 10
+; CHECK-NEXT:    br i1 [[TMP1190]], label %[[PRED_STORE_IF787:.*]], label %[[PRED_STORE_CONTINUE788:.*]]
+; CHECK:       [[PRED_STORE_IF787]]:
+; CHECK-NEXT:    [[TMP1191:%.*]] = add i64 [[INDEX]], 394
+; CHECK-NEXT:    [[TMP1192:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1191]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1192]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE788]]
+; CHECK:       [[PRED_STORE_CONTINUE788]]:
+; CHECK-NEXT:    [[TMP1193:%.*]] = extractelement <64 x i1> [[TMP6]], i32 11
+; CHECK-NEXT:    br i1 [[TMP1193]], label %[[PRED_STORE_IF789:.*]], label %[[PRED_STORE_CONTINUE790:.*]]
+; CHECK:       [[PRED_STORE_IF789]]:
+; CHECK-NEXT:    [[TMP1194:%.*]] = add i64 [[INDEX]], 395
+; CHECK-NEXT:    [[TMP1195:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1194]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1195]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE790]]
+; CHECK:       [[PRED_STORE_CONTINUE790]]:
+; CHECK-NEXT:    [[TMP1196:%.*]] = extractelement <64 x i1> [[TMP6]], i32 12
+; CHECK-NEXT:    br i1 [[TMP1196]], label %[[PRED_STORE_IF791:.*]], label %[[PRED_STORE_CONTINUE792:.*]]
+; CHECK:       [[PRED_STORE_IF791]]:
+; CHECK-NEXT:    [[TMP1197:%.*]] = add i64 [[INDEX]], 396
+; CHECK-NEXT:    [[TMP1198:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1197]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1198]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE792]]
+; CHECK:       [[PRED_STORE_CONTINUE792]]:
+; CHECK-NEXT:    [[TMP1199:%.*]] = extractelement <64 x i1> [[TMP6]], i32 13
+; CHECK-NEXT:    br i1 [[TMP1199]], label %[[PRED_STORE_IF793:.*]], label %[[PRED_STORE_CONTINUE794:.*]]
+; CHECK:       [[PRED_STORE_IF793]]:
+; CHECK-NEXT:    [[TMP1200:%.*]] = add i64 [[INDEX]], 397
+; CHECK-NEXT:    [[TMP1201:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1200]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1201]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE794]]
+; CHECK:       [[PRED_STORE_CONTINUE794]]:
+; CHECK-NEXT:    [[TMP1202:%.*]] = extractelement <64 x i1> [[TMP6]], i32 14
+; CHECK-NEXT:    br i1 [[TMP1202]], label %[[PRED_STORE_IF795:.*]], label %[[PRED_STORE_CONTINUE796:.*]]
+; CHECK:       [[PRED_STORE_IF795]]:
+; CHECK-NEXT:    [[TMP1203:%.*]] = add i64 [[INDEX]], 398
+; CHECK-NEXT:    [[TMP1204:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1203]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1204]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE796]]
+; CHECK:       [[PRED_STORE_CONTINUE796]]:
+; CHECK-NEXT:    [[TMP1205:%.*]] = extractelement <64 x i1> [[TMP6]], i32 15
+; CHECK-NEXT:    br i1 [[TMP1205]], label %[[PRED_STORE_IF797:.*]], label %[[PRED_STORE_CONTINUE798:.*]]
+; CHECK:       [[PRED_STORE_IF797]]:
+; CHECK-NEXT:    [[TMP1206:%.*]] = add i64 [[INDEX]], 399
+; CHECK-NEXT:    [[TMP1207:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1206]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1207]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE798]]
+; CHECK:       [[PRED_STORE_CONTINUE798]]:
+; CHECK-NEXT:    [[TMP1208:%.*]] = extractelement <64 x i1> [[TMP6]], i32 16
+; CHECK-NEXT:    br i1 [[TMP1208]], label %[[PRED_STORE_IF799:.*]], label %[[PRED_STORE_CONTINUE800:.*]]
+; CHECK:       [[PRED_STORE_IF799]]:
+; CHECK-NEXT:    [[TMP1209:%.*]] = add i64 [[INDEX]], 400
+; CHECK-NEXT:    [[TMP1210:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1209]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1210]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE800]]
+; CHECK:       [[PRED_STORE_CONTINUE800]]:
+; CHECK-NEXT:    [[TMP1211:%.*]] = extractelement <64 x i1> [[TMP6]], i32 17
+; CHECK-NEXT:    br i1 [[TMP1211]], label %[[PRED_STORE_IF801:.*]], label %[[PRED_STORE_CONTINUE802:.*]]
+; CHECK:       [[PRED_STORE_IF801]]:
+; CHECK-NEXT:    [[TMP1212:%.*]] = add i64 [[INDEX]], 401
+; CHECK-NEXT:    [[TMP1213:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1212]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1213]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE802]]
+; CHECK:       [[PRED_STORE_CONTINUE802]]:
+; CHECK-NEXT:    [[TMP1214:%.*]] = extractelement <64 x i1> [[TMP6]], i32 18
+; CHECK-NEXT:    br i1 [[TMP1214]], label %[[PRED_STORE_IF803:.*]], label %[[PRED_STORE_CONTINUE804:.*]]
+; CHECK:       [[PRED_STORE_IF803]]:
+; CHECK-NEXT:    [[TMP1215:%.*]] = add i64 [[INDEX]], 402
+; CHECK-NEXT:    [[TMP1216:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1215]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1216]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE804]]
+; CHECK:       [[PRED_STORE_CONTINUE804]]:
+; CHECK-NEXT:    [[TMP1217:%.*]] = extractelement <64 x i1> [[TMP6]], i32 19
+; CHECK-NEXT:    br i1 [[TMP1217]], label %[[PRED_STORE_IF805:.*]], label %[[PRED_STORE_CONTINUE806:.*]]
+; CHECK:       [[PRED_STORE_IF805]]:
+; CHECK-NEXT:    [[TMP1218:%.*]] = add i64 [[INDEX]], 403
+; CHECK-NEXT:    [[TMP1219:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1218]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1219]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE806]]
+; CHECK:       [[PRED_STORE_CONTINUE806]]:
+; CHECK-NEXT:    [[TMP1220:%.*]] = extractelement <64 x i1> [[TMP6]], i32 20
+; CHECK-NEXT:    br i1 [[TMP1220]], label %[[PRED_STORE_IF807:.*]], label %[[PRED_STORE_CONTINUE808:.*]]
+; CHECK:       [[PRED_STORE_IF807]]:
+; CHECK-NEXT:    [[TMP1221:%.*]] = add i64 [[INDEX]], 404
+; CHECK-NEXT:    [[TMP1222:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1221]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1222]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE808]]
+; CHECK:       [[PRED_STORE_CONTINUE808]]:
+; CHECK-NEXT:    [[TMP1223:%.*]] = extractelement <64 x i1> [[TMP6]], i32 21
+; CHECK-NEXT:    br i1 [[TMP1223]], label %[[PRED_STORE_IF809:.*]], label %[[PRED_STORE_CONTINUE810:.*]]
+; CHECK:       [[PRED_STORE_IF809]]:
+; CHECK-NEXT:    [[TMP1224:%.*]] = add i64 [[INDEX]], 405
+; CHECK-NEXT:    [[TMP1225:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1224]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1225]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE810]]
+; CHECK:       [[PRED_STORE_CONTINUE810]]:
+; CHECK-NEXT:    [[TMP1226:%.*]] = extractelement <64 x i1> [[TMP6]], i32 22
+; CHECK-NEXT:    br i1 [[TMP1226]], label %[[PRED_STORE_IF811:.*]], label %[[PRED_STORE_CONTINUE812:.*]]
+; CHECK:       [[PRED_STORE_IF811]]:
+; CHECK-NEXT:    [[TMP1227:%.*]] = add i64 [[INDEX]], 406
+; CHECK-NEXT:    [[TMP1228:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1227]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1228]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE812]]
+; CHECK:       [[PRED_STORE_CONTINUE812]]:
+; CHECK-NEXT:    [[TMP1229:%.*]] = extractelement <64 x i1> [[TMP6]], i32 23
+; CHECK-NEXT:    br i1 [[TMP1229]], label %[[PRED_STORE_IF813:.*]], label %[[PRED_STORE_CONTINUE814:.*]]
+; CHECK:       [[PRED_STORE_IF813]]:
+; CHECK-NEXT:    [[TMP1230:%.*]] = add i64 [[INDEX]], 407
+; CHECK-NEXT:    [[TMP1231:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1230]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1231]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE814]]
+; CHECK:       [[PRED_STORE_CONTINUE814]]:
+; CHECK-NEXT:    [[TMP1232:%.*]] = extractelement <64 x i1> [[TMP6]], i32 24
+; CHECK-NEXT:    br i1 [[TMP1232]], label %[[PRED_STORE_IF815:.*]], label %[[PRED_STORE_CONTINUE816:.*]]
+; CHECK:       [[PRED_STORE_IF815]]:
+; CHECK-NEXT:    [[TMP1233:%.*]] = add i64 [[INDEX]], 408
+; CHECK-NEXT:    [[TMP1234:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1233]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1234]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE816]]
+; CHECK:       [[PRED_STORE_CONTINUE816]]:
+; CHECK-NEXT:    [[TMP1235:%.*]] = extractelement <64 x i1> [[TMP6]], i32 25
+; CHECK-NEXT:    br i1 [[TMP1235]], label %[[PRED_STORE_IF817:.*]], label %[[PRED_STORE_CONTINUE818:.*]]
+; CHECK:       [[PRED_STORE_IF817]]:
+; CHECK-NEXT:    [[TMP1236:%.*]] = add i64 [[INDEX]], 409
+; CHECK-NEXT:    [[TMP1237:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1236]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1237]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE818]]
+; CHECK:       [[PRED_STORE_CONTINUE818]]:
+; CHECK-NEXT:    [[TMP1238:%.*]] = extractelement <64 x i1> [[TMP6]], i32 26
+; CHECK-NEXT:    br i1 [[TMP1238]], label %[[PRED_STORE_IF819:.*]], label %[[PRED_STORE_CONTINUE820:.*]]
+; CHECK:       [[PRED_STORE_IF819]]:
+; CHECK-NEXT:    [[TMP1239:%.*]] = add i64 [[INDEX]], 410
+; CHECK-NEXT:    [[TMP1240:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1239]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1240]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE820]]
+; CHECK:       [[PRED_STORE_CONTINUE820]]:
+; CHECK-NEXT:    [[TMP1241:%.*]] = extractelement <64 x i1> [[TMP6]], i32 27
+; CHECK-NEXT:    br i1 [[TMP1241]], label %[[PRED_STORE_IF821:.*]], label %[[PRED_STORE_CONTINUE822:.*]]
+; CHECK:       [[PRED_STORE_IF821]]:
+; CHECK-NEXT:    [[TMP1242:%.*]] = add i64 [[INDEX]], 411
+; CHECK-NEXT:    [[TMP1243:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1242]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1243]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE822]]
+; CHECK:       [[PRED_STORE_CONTINUE822]]:
+; CHECK-NEXT:    [[TMP1244:%.*]] = extractelement <64 x i1> [[TMP6]], i32 28
+; CHECK-NEXT:    br i1 [[TMP1244]], label %[[PRED_STORE_IF823:.*]], label %[[PRED_STORE_CONTINUE824:.*]]
+; CHECK:       [[PRED_STORE_IF823]]:
+; CHECK-NEXT:    [[TMP1245:%.*]] = add i64 [[INDEX]], 412
+; CHECK-NEXT:    [[TMP1246:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1245]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1246]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE824]]
+; CHECK:       [[PRED_STORE_CONTINUE824]]:
+; CHECK-NEXT:    [[TMP1247:%.*]] = extractelement <64 x i1> [[TMP6]], i32 29
+; CHECK-NEXT:    br i1 [[TMP1247]], label %[[PRED_STORE_IF825:.*]], label %[[PRED_STORE_CONTINUE826:.*]]
+; CHECK:       [[PRED_STORE_IF825]]:
+; CHECK-NEXT:    [[TMP1248:%.*]] = add i64 [[INDEX]], 413
+; CHECK-NEXT:    [[TMP1249:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1248]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1249]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE826]]
+; CHECK:       [[PRED_STORE_CONTINUE826]]:
+; CHECK-NEXT:    [[TMP1250:%.*]] = extractelement <64 x i1> [[TMP6]], i32 30
+; CHECK-NEXT:    br i1 [[TMP1250]], label %[[PRED_STORE_IF827:.*]], label %[[PRED_STORE_CONTINUE828:.*]]
+; CHECK:       [[PRED_STORE_IF827]]:
+; CHECK-NEXT:    [[TMP1251:%.*]] = add i64 [[INDEX]], 414
+; CHECK-NEXT:    [[TMP1252:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1251]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1252]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE828]]
+; CHECK:       [[PRED_STORE_CONTINUE828]]:
+; CHECK-NEXT:    [[TMP1253:%.*]] = extractelement <64 x i1> [[TMP6]], i32 31
+; CHECK-NEXT:    br i1 [[TMP1253]], label %[[PRED_STORE_IF829:.*]], label %[[PRED_STORE_CONTINUE830:.*]]
+; CHECK:       [[PRED_STORE_IF829]]:
+; CHECK-NEXT:    [[TMP1254:%.*]] = add i64 [[INDEX]], 415
+; CHECK-NEXT:    [[TMP1255:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1254]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1255]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE830]]
+; CHECK:       [[PRED_STORE_CONTINUE830]]:
+; CHECK-NEXT:    [[TMP1256:%.*]] = extractelement <64 x i1> [[TMP6]], i32 32
+; CHECK-NEXT:    br i1 [[TMP1256]], label %[[PRED_STORE_IF831:.*]], label %[[PRED_STORE_CONTINUE832:.*]]
+; CHECK:       [[PRED_STORE_IF831]]:
+; CHECK-NEXT:    [[TMP1257:%.*]] = add i64 [[INDEX]], 416
+; CHECK-NEXT:    [[TMP1258:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1257]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1258]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE832]]
+; CHECK:       [[PRED_STORE_CONTINUE832]]:
+; CHECK-NEXT:    [[TMP1259:%.*]] = extractelement <64 x i1> [[TMP6]], i32 33
+; CHECK-NEXT:    br i1 [[TMP1259]], label %[[PRED_STORE_IF833:.*]], label %[[PRED_STORE_CONTINUE834:.*]]
+; CHECK:       [[PRED_STORE_IF833]]:
+; CHECK-NEXT:    [[TMP1260:%.*]] = add i64 [[INDEX]], 417
+; CHECK-NEXT:    [[TMP1261:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1260]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1261]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE834]]
+; CHECK:       [[PRED_STORE_CONTINUE834]]:
+; CHECK-NEXT:    [[TMP1262:%.*]] = extractelement <64 x i1> [[TMP6]], i32 34
+; CHECK-NEXT:    br i1 [[TMP1262]], label %[[PRED_STORE_IF835:.*]], label %[[PRED_STORE_CONTINUE836:.*]]
+; CHECK:       [[PRED_STORE_IF835]]:
+; CHECK-NEXT:    [[TMP1263:%.*]] = add i64 [[INDEX]], 418
+; CHECK-NEXT:    [[TMP1264:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1263]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1264]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE836]]
+; CHECK:       [[PRED_STORE_CONTINUE836]]:
+; CHECK-NEXT:    [[TMP1265:%.*]] = extractelement <64 x i1> [[TMP6]], i32 35
+; CHECK-NEXT:    br i1 [[TMP1265]], label %[[PRED_STORE_IF837:.*]], label %[[PRED_STORE_CONTINUE838:.*]]
+; CHECK:       [[PRED_STORE_IF837]]:
+; CHECK-NEXT:    [[TMP1266:%.*]] = add i64 [[INDEX]], 419
+; CHECK-NEXT:    [[TMP1267:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1266]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1267]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE838]]
+; CHECK:       [[PRED_STORE_CONTINUE838]]:
+; CHECK-NEXT:    [[TMP1268:%.*]] = extractelement <64 x i1> [[TMP6]], i32 36
+; CHECK-NEXT:    br i1 [[TMP1268]], label %[[PRED_STORE_IF839:.*]], label %[[PRED_STORE_CONTINUE840:.*]]
+; CHECK:       [[PRED_STORE_IF839]]:
+; CHECK-NEXT:    [[TMP1269:%.*]] = add i64 [[INDEX]], 420
+; CHECK-NEXT:    [[TMP1270:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1269]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1270]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE840]]
+; CHECK:       [[PRED_STORE_CONTINUE840]]:
+; CHECK-NEXT:    [[TMP1271:%.*]] = extractelement <64 x i1> [[TMP6]], i32 37
+; CHECK-NEXT:    br i1 [[TMP1271]], label %[[PRED_STORE_IF841:.*]], label %[[PRED_STORE_CONTINUE842:.*]]
+; CHECK:       [[PRED_STORE_IF841]]:
+; CHECK-NEXT:    [[TMP1272:%.*]] = add i64 [[INDEX]], 421
+; CHECK-NEXT:    [[TMP1273:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1272]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1273]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE842]]
+; CHECK:       [[PRED_STORE_CONTINUE842]]:
+; CHECK-NEXT:    [[TMP1274:%.*]] = extractelement <64 x i1> [[TMP6]], i32 38
+; CHECK-NEXT:    br i1 [[TMP1274]], label %[[PRED_STORE_IF843:.*]], label %[[PRED_STORE_CONTINUE844:.*]]
+; CHECK:       [[PRED_STORE_IF843]]:
+; CHECK-NEXT:    [[TMP1275:%.*]] = add i64 [[INDEX]], 422
+; CHECK-NEXT:    [[TMP1276:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1275]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1276]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE844]]
+; CHECK:       [[PRED_STORE_CONTINUE844]]:
+; CHECK-NEXT:    [[TMP1277:%.*]] = extractelement <64 x i1> [[TMP6]], i32 39
+; CHECK-NEXT:    br i1 [[TMP1277]], label %[[PRED_STORE_IF845:.*]], label %[[PRED_STORE_CONTINUE846:.*]]
+; CHECK:       [[PRED_STORE_IF845]]:
+; CHECK-NEXT:    [[TMP1278:%.*]] = add i64 [[INDEX]], 423
+; CHECK-NEXT:    [[TMP1279:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1278]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1279]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE846]]
+; CHECK:       [[PRED_STORE_CONTINUE846]]:
+; CHECK-NEXT:    [[TMP1280:%.*]] = extractelement <64 x i1> [[TMP6]], i32 40
+; CHECK-NEXT:    br i1 [[TMP1280]], label %[[PRED_STORE_IF847:.*]], label %[[PRED_STORE_CONTINUE848:.*]]
+; CHECK:       [[PRED_STORE_IF847]]:
+; CHECK-NEXT:    [[TMP1281:%.*]] = add i64 [[INDEX]], 424
+; CHECK-NEXT:    [[TMP1282:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1281]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1282]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE848]]
+; CHECK:       [[PRED_STORE_CONTINUE848]]:
+; CHECK-NEXT:    [[TMP1283:%.*]] = extractelement <64 x i1> [[TMP6]], i32 41
+; CHECK-NEXT:    br i1 [[TMP1283]], label %[[PRED_STORE_IF849:.*]], label %[[PRED_STORE_CONTINUE850:.*]]
+; CHECK:       [[PRED_STORE_IF849]]:
+; CHECK-NEXT:    [[TMP1284:%.*]] = add i64 [[INDEX]], 425
+; CHECK-NEXT:    [[TMP1285:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1284]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1285]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE850]]
+; CHECK:       [[PRED_STORE_CONTINUE850]]:
+; CHECK-NEXT:    [[TMP1286:%.*]] = extractelement <64 x i1> [[TMP6]], i32 42
+; CHECK-NEXT:    br i1 [[TMP1286]], label %[[PRED_STORE_IF851:.*]], label %[[PRED_STORE_CONTINUE852:.*]]
+; CHECK:       [[PRED_STORE_IF851]]:
+; CHECK-NEXT:    [[TMP1287:%.*]] = add i64 [[INDEX]], 426
+; CHECK-NEXT:    [[TMP1288:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1287]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1288]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE852]]
+; CHECK:       [[PRED_STORE_CONTINUE852]]:
+; CHECK-NEXT:    [[TMP1289:%.*]] = extractelement <64 x i1> [[TMP6]], i32 43
+; CHECK-NEXT:    br i1 [[TMP1289]], label %[[PRED_STORE_IF853:.*]], label %[[PRED_STORE_CONTINUE854:.*]]
+; CHECK:       [[PRED_STORE_IF853]]:
+; CHECK-NEXT:    [[TMP1290:%.*]] = add i64 [[INDEX]], 427
+; CHECK-NEXT:    [[TMP1291:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1290]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1291]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE854]]
+; CHECK:       [[PRED_STORE_CONTINUE854]]:
+; CHECK-NEXT:    [[TMP1292:%.*]] = extractelement <64 x i1> [[TMP6]], i32 44
+; CHECK-NEXT:    br i1 [[TMP1292]], label %[[PRED_STORE_IF855:.*]], label %[[PRED_STORE_CONTINUE856:.*]]
+; CHECK:       [[PRED_STORE_IF855]]:
+; CHECK-NEXT:    [[TMP1293:%.*]] = add i64 [[INDEX]], 428
+; CHECK-NEXT:    [[TMP1294:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1293]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1294]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE856]]
+; CHECK:       [[PRED_STORE_CONTINUE856]]:
+; CHECK-NEXT:    [[TMP1295:%.*]] = extractelement <64 x i1> [[TMP6]], i32 45
+; CHECK-NEXT:    br i1 [[TMP1295]], label %[[PRED_STORE_IF857:.*]], label %[[PRED_STORE_CONTINUE858:.*]]
+; CHECK:       [[PRED_STORE_IF857]]:
+; CHECK-NEXT:    [[TMP1296:%.*]] = add i64 [[INDEX]], 429
+; CHECK-NEXT:    [[TMP1297:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1296]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1297]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE858]]
+; CHECK:       [[PRED_STORE_CONTINUE858]]:
+; CHECK-NEXT:    [[TMP1298:%.*]] = extractelement <64 x i1> [[TMP6]], i32 46
+; CHECK-NEXT:    br i1 [[TMP1298]], label %[[PRED_STORE_IF859:.*]], label %[[PRED_STORE_CONTINUE860:.*]]
+; CHECK:       [[PRED_STORE_IF859]]:
+; CHECK-NEXT:    [[TMP1299:%.*]] = add i64 [[INDEX]], 430
+; CHECK-NEXT:    [[TMP1300:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1299]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1300]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE860]]
+; CHECK:       [[PRED_STORE_CONTINUE860]]:
+; CHECK-NEXT:    [[TMP1301:%.*]] = extractelement <64 x i1> [[TMP6]], i32 47
+; CHECK-NEXT:    br i1 [[TMP1301]], label %[[PRED_STORE_IF861:.*]], label %[[PRED_STORE_CONTINUE862:.*]]
+; CHECK:       [[PRED_STORE_IF861]]:
+; CHECK-NEXT:    [[TMP1302:%.*]] = add i64 [[INDEX]], 431
+; CHECK-NEXT:    [[TMP1303:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1302]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1303]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE862]]
+; CHECK:       [[PRED_STORE_CONTINUE862]]:
+; CHECK-NEXT:    [[TMP1304:%.*]] = extractelement <64 x i1> [[TMP6]], i32 48
+; CHECK-NEXT:    br i1 [[TMP1304]], label %[[PRED_STORE_IF863:.*]], label %[[PRED_STORE_CONTINUE864:.*]]
+; CHECK:       [[PRED_STORE_IF863]]:
+; CHECK-NEXT:    [[TMP1305:%.*]] = add i64 [[INDEX]], 432
+; CHECK-NEXT:    [[TMP1306:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1305]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1306]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE864]]
+; CHECK:       [[PRED_STORE_CONTINUE864]]:
+; CHECK-NEXT:    [[TMP1307:%.*]] = extractelement <64 x i1> [[TMP6]], i32 49
+; CHECK-NEXT:    br i1 [[TMP1307]], label %[[PRED_STORE_IF865:.*]], label %[[PRED_STORE_CONTINUE866:.*]]
+; CHECK:       [[PRED_STORE_IF865]]:
+; CHECK-NEXT:    [[TMP1308:%.*]] = add i64 [[INDEX]], 433
+; CHECK-NEXT:    [[TMP1309:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1308]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1309]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE866]]
+; CHECK:       [[PRED_STORE_CONTINUE866]]:
+; CHECK-NEXT:    [[TMP1310:%.*]] = extractelement <64 x i1> [[TMP6]], i32 50
+; CHECK-NEXT:    br i1 [[TMP1310]], label %[[PRED_STORE_IF867:.*]], label %[[PRED_STORE_CONTINUE868:.*]]
+; CHECK:       [[PRED_STORE_IF867]]:
+; CHECK-NEXT:    [[TMP1311:%.*]] = add i64 [[INDEX]], 434
+; CHECK-NEXT:    [[TMP1312:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1311]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1312]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE868]]
+; CHECK:       [[PRED_STORE_CONTINUE868]]:
+; CHECK-NEXT:    [[TMP1313:%.*]] = extractelement <64 x i1> [[TMP6]], i32 51
+; CHECK-NEXT:    br i1 [[TMP1313]], label %[[PRED_STORE_IF869:.*]], label %[[PRED_STORE_CONTINUE870:.*]]
+; CHECK:       [[PRED_STORE_IF869]]:
+; CHECK-NEXT:    [[TMP1314:%.*]] = add i64 [[INDEX]], 435
+; CHECK-NEXT:    [[TMP1315:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1314]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1315]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE870]]
+; CHECK:       [[PRED_STORE_CONTINUE870]]:
+; CHECK-NEXT:    [[TMP1316:%.*]] = extractelement <64 x i1> [[TMP6]], i32 52
+; CHECK-NEXT:    br i1 [[TMP1316]], label %[[PRED_STORE_IF871:.*]], label %[[PRED_STORE_CONTINUE872:.*]]
+; CHECK:       [[PRED_STORE_IF871]]:
+; CHECK-NEXT:    [[TMP1317:%.*]] = add i64 [[INDEX]], 436
+; CHECK-NEXT:    [[TMP1318:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1317]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1318]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE872]]
+; CHECK:       [[PRED_STORE_CONTINUE872]]:
+; CHECK-NEXT:    [[TMP1319:%.*]] = extractelement <64 x i1> [[TMP6]], i32 53
+; CHECK-NEXT:    br i1 [[TMP1319]], label %[[PRED_STORE_IF873:.*]], label %[[PRED_STORE_CONTINUE874:.*]]
+; CHECK:       [[PRED_STORE_IF873]]:
+; CHECK-NEXT:    [[TMP1320:%.*]] = add i64 [[INDEX]], 437
+; CHECK-NEXT:    [[TMP1321:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1320]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1321]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE874]]
+; CHECK:       [[PRED_STORE_CONTINUE874]]:
+; CHECK-NEXT:    [[TMP1322:%.*]] = extractelement <64 x i1> [[TMP6]], i32 54
+; CHECK-NEXT:    br i1 [[TMP1322]], label %[[PRED_STORE_IF875:.*]], label %[[PRED_STORE_CONTINUE876:.*]]
+; CHECK:       [[PRED_STORE_IF875]]:
+; CHECK-NEXT:    [[TMP1323:%.*]] = add i64 [[INDEX]], 438
+; CHECK-NEXT:    [[TMP1324:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1323]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1324]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE876]]
+; CHECK:       [[PRED_STORE_CONTINUE876]]:
+; CHECK-NEXT:    [[TMP1325:%.*]] = extractelement <64 x i1> [[TMP6]], i32 55
+; CHECK-NEXT:    br i1 [[TMP1325]], label %[[PRED_STORE_IF877:.*]], label %[[PRED_STORE_CONTINUE878:.*]]
+; CHECK:       [[PRED_STORE_IF877]]:
+; CHECK-NEXT:    [[TMP1326:%.*]] = add i64 [[INDEX]], 439
+; CHECK-NEXT:    [[TMP1327:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1326]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1327]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE878]]
+; CHECK:       [[PRED_STORE_CONTINUE878]]:
+; CHECK-NEXT:    [[TMP1328:%.*]] = extractelement <64 x i1> [[TMP6]], i32 56
+; CHECK-NEXT:    br i1 [[TMP1328]], label %[[PRED_STORE_IF879:.*]], label %[[PRED_STORE_CONTINUE880:.*]]
+; CHECK:       [[PRED_STORE_IF879]]:
+; CHECK-NEXT:    [[TMP1329:%.*]] = add i64 [[INDEX]], 440
+; CHECK-NEXT:    [[TMP1330:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1329]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1330]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE880]]
+; CHECK:       [[PRED_STORE_CONTINUE880]]:
+; CHECK-NEXT:    [[TMP1331:%.*]] = extractelement <64 x i1> [[TMP6]], i32 57
+; CHECK-NEXT:    br i1 [[TMP1331]], label %[[PRED_STORE_IF881:.*]], label %[[PRED_STORE_CONTINUE882:.*]]
+; CHECK:       [[PRED_STORE_IF881]]:
+; CHECK-NEXT:    [[TMP1332:%.*]] = add i64 [[INDEX]], 441
+; CHECK-NEXT:    [[TMP1333:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1332]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1333]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE882]]
+; CHECK:       [[PRED_STORE_CONTINUE882]]:
+; CHECK-NEXT:    [[TMP1334:%.*]] = extractelement <64 x i1> [[TMP6]], i32 58
+; CHECK-NEXT:    br i1 [[TMP1334]], label %[[PRED_STORE_IF883:.*]], label %[[PRED_STORE_CONTINUE884:.*]]
+; CHECK:       [[PRED_STORE_IF883]]:
+; CHECK-NEXT:    [[TMP1335:%.*]] = add i64 [[INDEX]], 442
+; CHECK-NEXT:    [[TMP1336:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1335]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1336]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE884]]
+; CHECK:       [[PRED_STORE_CONTINUE884]]:
+; CHECK-NEXT:    [[TMP1337:%.*]] = extractelement <64 x i1> [[TMP6]], i32 59
+; CHECK-NEXT:    br i1 [[TMP1337]], label %[[PRED_STORE_IF885:.*]], label %[[PRED_STORE_CONTINUE886:.*]]
+; CHECK:       [[PRED_STORE_IF885]]:
+; CHECK-NEXT:    [[TMP1338:%.*]] = add i64 [[INDEX]], 443
+; CHECK-NEXT:    [[TMP1339:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1338]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1339]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE886]]
+; CHECK:       [[PRED_STORE_CONTINUE886]]:
+; CHECK-NEXT:    [[TMP1340:%.*]] = extractelement <64 x i1> [[TMP6]], i32 60
+; CHECK-NEXT:    br i1 [[TMP1340]], label %[[PRED_STORE_IF887:.*]], label %[[PRED_STORE_CONTINUE888:.*]]
+; CHECK:       [[PRED_STORE_IF887]]:
+; CHECK-NEXT:    [[TMP1341:%.*]] = add i64 [[INDEX]], 444
+; CHECK-NEXT:    [[TMP1342:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1341]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1342]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE888]]
+; CHECK:       [[PRED_STORE_CONTINUE888]]:
+; CHECK-NEXT:    [[TMP1343:%.*]] = extractelement <64 x i1> [[TMP6]], i32 61
+; CHECK-NEXT:    br i1 [[TMP1343]], label %[[PRED_STORE_IF889:.*]], label %[[PRED_STORE_CONTINUE890:.*]]
+; CHECK:       [[PRED_STORE_IF889]]:
+; CHECK-NEXT:    [[TMP1344:%.*]] = add i64 [[INDEX]], 445
+; CHECK-NEXT:    [[TMP1345:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1344]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1345]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE890]]
+; CHECK:       [[PRED_STORE_CONTINUE890]]:
+; CHECK-NEXT:    [[TMP1346:%.*]] = extractelement <64 x i1> [[TMP6]], i32 62
+; CHECK-NEXT:    br i1 [[TMP1346]], label %[[PRED_STORE_IF891:.*]], label %[[PRED_STORE_CONTINUE892:.*]]
+; CHECK:       [[PRED_STORE_IF891]]:
+; CHECK-NEXT:    [[TMP1347:%.*]] = add i64 [[INDEX]], 446
+; CHECK-NEXT:    [[TMP1348:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1347]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1348]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE892]]
+; CHECK:       [[PRED_STORE_CONTINUE892]]:
+; CHECK-NEXT:    [[TMP1349:%.*]] = extractelement <64 x i1> [[TMP6]], i32 63
+; CHECK-NEXT:    br i1 [[TMP1349]], label %[[PRED_STORE_IF893:.*]], label %[[PRED_STORE_CONTINUE894:.*]]
+; CHECK:       [[PRED_STORE_IF893]]:
+; CHECK-NEXT:    [[TMP1350:%.*]] = add i64 [[INDEX]], 447
+; CHECK-NEXT:    [[TMP1351:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1350]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1351]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE894]]
+; CHECK:       [[PRED_STORE_CONTINUE894]]:
+; CHECK-NEXT:    [[TMP1352:%.*]] = extractelement <64 x i1> [[TMP7]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1352]], label %[[PRED_STORE_IF895:.*]], label %[[PRED_STORE_CONTINUE896:.*]]
+; CHECK:       [[PRED_STORE_IF895]]:
+; CHECK-NEXT:    [[TMP1353:%.*]] = add i64 [[INDEX]], 448
+; CHECK-NEXT:    [[TMP1354:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1353]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1354]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE896]]
+; CHECK:       [[PRED_STORE_CONTINUE896]]:
+; CHECK-NEXT:    [[TMP1355:%.*]] = extractelement <64 x i1> [[TMP7]], i32 1
+; CHECK-NEXT:    br i1 [[TMP1355]], label %[[PRED_STORE_IF897:.*]], label %[[PRED_STORE_CONTINUE898:.*]]
+; CHECK:       [[PRED_STORE_IF897]]:
+; CHECK-NEXT:    [[TMP1356:%.*]] = add i64 [[INDEX]], 449
+; CHECK-NEXT:    [[TMP1357:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1356]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1357]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE898]]
+; CHECK:       [[PRED_STORE_CONTINUE898]]:
+; CHECK-NEXT:    [[TMP1358:%.*]] = extractelement <64 x i1> [[TMP7]], i32 2
+; CHECK-NEXT:    br i1 [[TMP1358]], label %[[PRED_STORE_IF899:.*]], label %[[PRED_STORE_CONTINUE900:.*]]
+; CHECK:       [[PRED_STORE_IF899]]:
+; CHECK-NEXT:    [[TMP1359:%.*]] = add i64 [[INDEX]], 450
+; CHECK-NEXT:    [[TMP1360:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1359]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1360]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE900]]
+; CHECK:       [[PRED_STORE_CONTINUE900]]:
+; CHECK-NEXT:    [[TMP1361:%.*]] = extractelement <64 x i1> [[TMP7]], i32 3
+; CHECK-NEXT:    br i1 [[TMP1361]], label %[[PRED_STORE_IF901:.*]], label %[[PRED_STORE_CONTINUE902:.*]]
+; CHECK:       [[PRED_STORE_IF901]]:
+; CHECK-NEXT:    [[TMP1362:%.*]] = add i64 [[INDEX]], 451
+; CHECK-NEXT:    [[TMP1363:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1362]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1363]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE902]]
+; CHECK:       [[PRED_STORE_CONTINUE902]]:
+; CHECK-NEXT:    [[TMP1364:%.*]] = extractelement <64 x i1> [[TMP7]], i32 4
+; CHECK-NEXT:    br i1 [[TMP1364]], label %[[PRED_STORE_IF903:.*]], label %[[PRED_STORE_CONTINUE904:.*]]
+; CHECK:       [[PRED_STORE_IF903]]:
+; CHECK-NEXT:    [[TMP1365:%.*]] = add i64 [[INDEX]], 452
+; CHECK-NEXT:    [[TMP1366:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1365]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1366]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE904]]
+; CHECK:       [[PRED_STORE_CONTINUE904]]:
+; CHECK-NEXT:    [[TMP1367:%.*]] = extractelement <64 x i1> [[TMP7]], i32 5
+; CHECK-NEXT:    br i1 [[TMP1367]], label %[[PRED_STORE_IF905:.*]], label %[[PRED_STORE_CONTINUE906:.*]]
+; CHECK:       [[PRED_STORE_IF905]]:
+; CHECK-NEXT:    [[TMP1368:%.*]] = add i64 [[INDEX]], 453
+; CHECK-NEXT:    [[TMP1369:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1368]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1369]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE906]]
+; CHECK:       [[PRED_STORE_CONTINUE906]]:
+; CHECK-NEXT:    [[TMP1370:%.*]] = extractelement <64 x i1> [[TMP7]], i32 6
+; CHECK-NEXT:    br i1 [[TMP1370]], label %[[PRED_STORE_IF907:.*]], label %[[PRED_STORE_CONTINUE908:.*]]
+; CHECK:       [[PRED_STORE_IF907]]:
+; CHECK-NEXT:    [[TMP1371:%.*]] = add i64 [[INDEX]], 454
+; CHECK-NEXT:    [[TMP1372:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1371]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1372]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE908]]
+; CHECK:       [[PRED_STORE_CONTINUE908]]:
+; CHECK-NEXT:    [[TMP1373:%.*]] = extractelement <64 x i1> [[TMP7]], i32 7
+; CHECK-NEXT:    br i1 [[TMP1373]], label %[[PRED_STORE_IF909:.*]], label %[[PRED_STORE_CONTINUE910:.*]]
+; CHECK:       [[PRED_STORE_IF909]]:
+; CHECK-NEXT:    [[TMP1374:%.*]] = add i64 [[INDEX]], 455
+; CHECK-NEXT:    [[TMP1375:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1374]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1375]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE910]]
+; CHECK:       [[PRED_STORE_CONTINUE910]]:
+; CHECK-NEXT:    [[TMP1376:%.*]] = extractelement <64 x i1> [[TMP7]], i32 8
+; CHECK-NEXT:    br i1 [[TMP1376]], label %[[PRED_STORE_IF911:.*]], label %[[PRED_STORE_CONTINUE912:.*]]
+; CHECK:       [[PRED_STORE_IF911]]:
+; CHECK-NEXT:    [[TMP1377:%.*]] = add i64 [[INDEX]], 456
+; CHECK-NEXT:    [[TMP1378:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1377]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1378]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE912]]
+; CHECK:       [[PRED_STORE_CONTINUE912]]:
+; CHECK-NEXT:    [[TMP1379:%.*]] = extractelement <64 x i1> [[TMP7]], i32 9
+; CHECK-NEXT:    br i1 [[TMP1379]], label %[[PRED_STORE_IF913:.*]], label %[[PRED_STORE_CONTINUE914:.*]]
+; CHECK:       [[PRED_STORE_IF913]]:
+; CHECK-NEXT:    [[TMP1380:%.*]] = add i64 [[INDEX]], 457
+; CHECK-NEXT:    [[TMP1381:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1380]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1381]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE914]]
+; CHECK:       [[PRED_STORE_CONTINUE914]]:
+; CHECK-NEXT:    [[TMP1382:%.*]] = extractelement <64 x i1> [[TMP7]], i32 10
+; CHECK-NEXT:    br i1 [[TMP1382]], label %[[PRED_STORE_IF915:.*]], label %[[PRED_STORE_CONTINUE916:.*]]
+; CHECK:       [[PRED_STORE_IF915]]:
+; CHECK-NEXT:    [[TMP1383:%.*]] = add i64 [[INDEX]], 458
+; CHECK-NEXT:    [[TMP1384:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1383]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1384]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE916]]
+; CHECK:       [[PRED_STORE_CONTINUE916]]:
+; CHECK-NEXT:    [[TMP1385:%.*]] = extractelement <64 x i1> [[TMP7]], i32 11
+; CHECK-NEXT:    br i1 [[TMP1385]], label %[[PRED_STORE_IF917:.*]], label %[[PRED_STORE_CONTINUE918:.*]]
+; CHECK:       [[PRED_STORE_IF917]]:
+; CHECK-NEXT:    [[TMP1386:%.*]] = add i64 [[INDEX]], 459
+; CHECK-NEXT:    [[TMP1387:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1386]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1387]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE918]]
+; CHECK:       [[PRED_STORE_CONTINUE918]]:
+; CHECK-NEXT:    [[TMP1388:%.*]] = extractelement <64 x i1> [[TMP7]], i32 12
+; CHECK-NEXT:    br i1 [[TMP1388]], label %[[PRED_STORE_IF919:.*]], label %[[PRED_STORE_CONTINUE920:.*]]
+; CHECK:       [[PRED_STORE_IF919]]:
+; CHECK-NEXT:    [[TMP1389:%.*]] = add i64 [[INDEX]], 460
+; CHECK-NEXT:    [[TMP1390:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1389]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1390]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE920]]
+; CHECK:       [[PRED_STORE_CONTINUE920]]:
+; CHECK-NEXT:    [[TMP1391:%.*]] = extractelement <64 x i1> [[TMP7]], i32 13
+; CHECK-NEXT:    br i1 [[TMP1391]], label %[[PRED_STORE_IF921:.*]], label %[[PRED_STORE_CONTINUE922:.*]]
+; CHECK:       [[PRED_STORE_IF921]]:
+; CHECK-NEXT:    [[TMP1392:%.*]] = add i64 [[INDEX]], 461
+; CHECK-NEXT:    [[TMP1393:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1392]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1393]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE922]]
+; CHECK:       [[PRED_STORE_CONTINUE922]]:
+; CHECK-NEXT:    [[TMP1394:%.*]] = extractelement <64 x i1> [[TMP7]], i32 14
+; CHECK-NEXT:    br i1 [[TMP1394]], label %[[PRED_STORE_IF923:.*]], label %[[PRED_STORE_CONTINUE924:.*]]
+; CHECK:       [[PRED_STORE_IF923]]:
+; CHECK-NEXT:    [[TMP1395:%.*]] = add i64 [[INDEX]], 462
+; CHECK-NEXT:    [[TMP1396:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1395]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1396]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE924]]
+; CHECK:       [[PRED_STORE_CONTINUE924]]:
+; CHECK-NEXT:    [[TMP1397:%.*]] = extractelement <64 x i1> [[TMP7]], i32 15
+; CHECK-NEXT:    br i1 [[TMP1397]], label %[[PRED_STORE_IF925:.*]], label %[[PRED_STORE_CONTINUE926:.*]]
+; CHECK:       [[PRED_STORE_IF925]]:
+; CHECK-NEXT:    [[TMP1398:%.*]] = add i64 [[INDEX]], 463
+; CHECK-NEXT:    [[TMP1399:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1398]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1399]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE926]]
+; CHECK:       [[PRED_STORE_CONTINUE926]]:
+; CHECK-NEXT:    [[TMP1400:%.*]] = extractelement <64 x i1> [[TMP7]], i32 16
+; CHECK-NEXT:    br i1 [[TMP1400]], label %[[PRED_STORE_IF927:.*]], label %[[PRED_STORE_CONTINUE928:.*]]
+; CHECK:       [[PRED_STORE_IF927]]:
+; CHECK-NEXT:    [[TMP1401:%.*]] = add i64 [[INDEX]], 464
+; CHECK-NEXT:    [[TMP1402:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1401]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1402]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE928]]
+; CHECK:       [[PRED_STORE_CONTINUE928]]:
+; CHECK-NEXT:    [[TMP1403:%.*]] = extractelement <64 x i1> [[TMP7]], i32 17
+; CHECK-NEXT:    br i1 [[TMP1403]], label %[[PRED_STORE_IF929:.*]], label %[[PRED_STORE_CONTINUE930:.*]]
+; CHECK:       [[PRED_STORE_IF929]]:
+; CHECK-NEXT:    [[TMP1404:%.*]] = add i64 [[INDEX]], 465
+; CHECK-NEXT:    [[TMP1405:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1404]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1405]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE930]]
+; CHECK:       [[PRED_STORE_CONTINUE930]]:
+; CHECK-NEXT:    [[TMP1406:%.*]] = extractelement <64 x i1> [[TMP7]], i32 18
+; CHECK-NEXT:    br i1 [[TMP1406]], label %[[PRED_STORE_IF931:.*]], label %[[PRED_STORE_CONTINUE932:.*]]
+; CHECK:       [[PRED_STORE_IF931]]:
+; CHECK-NEXT:    [[TMP1407:%.*]] = add i64 [[INDEX]], 466
+; CHECK-NEXT:    [[TMP1408:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1407]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1408]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE932]]
+; CHECK:       [[PRED_STORE_CONTINUE932]]:
+; CHECK-NEXT:    [[TMP1409:%.*]] = extractelement <64 x i1> [[TMP7]], i32 19
+; CHECK-NEXT:    br i1 [[TMP1409]], label %[[PRED_STORE_IF933:.*]], label %[[PRED_STORE_CONTINUE934:.*]]
+; CHECK:       [[PRED_STORE_IF933]]:
+; CHECK-NEXT:    [[TMP1410:%.*]] = add i64 [[INDEX]], 467
+; CHECK-NEXT:    [[TMP1411:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1410]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1411]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE934]]
+; CHECK:       [[PRED_STORE_CONTINUE934]]:
+; CHECK-NEXT:    [[TMP1412:%.*]] = extractelement <64 x i1> [[TMP7]], i32 20
+; CHECK-NEXT:    br i1 [[TMP1412]], label %[[PRED_STORE_IF935:.*]], label %[[PRED_STORE_CONTINUE936:.*]]
+; CHECK:       [[PRED_STORE_IF935]]:
+; CHECK-NEXT:    [[TMP1413:%.*]] = add i64 [[INDEX]], 468
+; CHECK-NEXT:    [[TMP1414:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1413]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1414]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE936]]
+; CHECK:       [[PRED_STORE_CONTINUE936]]:
+; CHECK-NEXT:    [[TMP1415:%.*]] = extractelement <64 x i1> [[TMP7]], i32 21
+; CHECK-NEXT:    br i1 [[TMP1415]], label %[[PRED_STORE_IF937:.*]], label %[[PRED_STORE_CONTINUE938:.*]]
+; CHECK:       [[PRED_STORE_IF937]]:
+; CHECK-NEXT:    [[TMP1416:%.*]] = add i64 [[INDEX]], 469
+; CHECK-NEXT:    [[TMP1417:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1416]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1417]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE938]]
+; CHECK:       [[PRED_STORE_CONTINUE938]]:
+; CHECK-NEXT:    [[TMP1418:%.*]] = extractelement <64 x i1> [[TMP7]], i32 22
+; CHECK-NEXT:    br i1 [[TMP1418]], label %[[PRED_STORE_IF939:.*]], label %[[PRED_STORE_CONTINUE940:.*]]
+; CHECK:       [[PRED_STORE_IF939]]:
+; CHECK-NEXT:    [[TMP1419:%.*]] = add i64 [[INDEX]], 470
+; CHECK-NEXT:    [[TMP1420:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1419]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1420]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE940]]
+; CHECK:       [[PRED_STORE_CONTINUE940]]:
+; CHECK-NEXT:    [[TMP1421:%.*]] = extractelement <64 x i1> [[TMP7]], i32 23
+; CHECK-NEXT:    br i1 [[TMP1421]], label %[[PRED_STORE_IF941:.*]], label %[[PRED_STORE_CONTINUE942:.*]]
+; CHECK:       [[PRED_STORE_IF941]]:
+; CHECK-NEXT:    [[TMP1422:%.*]] = add i64 [[INDEX]], 471
+; CHECK-NEXT:    [[TMP1423:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1422]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1423]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE942]]
+; CHECK:       [[PRED_STORE_CONTINUE942]]:
+; CHECK-NEXT:    [[TMP1424:%.*]] = extractelement <64 x i1> [[TMP7]], i32 24
+; CHECK-NEXT:    br i1 [[TMP1424]], label %[[PRED_STORE_IF943:.*]], label %[[PRED_STORE_CONTINUE944:.*]]
+; CHECK:       [[PRED_STORE_IF943]]:
+; CHECK-NEXT:    [[TMP1425:%.*]] = add i64 [[INDEX]], 472
+; CHECK-NEXT:    [[TMP1426:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1425]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1426]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE944]]
+; CHECK:       [[PRED_STORE_CONTINUE944]]:
+; CHECK-NEXT:    [[TMP1427:%.*]] = extractelement <64 x i1> [[TMP7]], i32 25
+; CHECK-NEXT:    br i1 [[TMP1427]], label %[[PRED_STORE_IF945:.*]], label %[[PRED_STORE_CONTINUE946:.*]]
+; CHECK:       [[PRED_STORE_IF945]]:
+; CHECK-NEXT:    [[TMP1428:%.*]] = add i64 [[INDEX]], 473
+; CHECK-NEXT:    [[TMP1429:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1428]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1429]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE946]]
+; CHECK:       [[PRED_STORE_CONTINUE946]]:
+; CHECK-NEXT:    [[TMP1430:%.*]] = extractelement <64 x i1> [[TMP7]], i32 26
+; CHECK-NEXT:    br i1 [[TMP1430]], label %[[PRED_STORE_IF947:.*]], label %[[PRED_STORE_CONTINUE948:.*]]
+; CHECK:       [[PRED_STORE_IF947]]:
+; CHECK-NEXT:    [[TMP1431:%.*]] = add i64 [[INDEX]], 474
+; CHECK-NEXT:    [[TMP1432:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1431]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1432]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE948]]
+; CHECK:       [[PRED_STORE_CONTINUE948]]:
+; CHECK-NEXT:    [[TMP1433:%.*]] = extractelement <64 x i1> [[TMP7]], i32 27
+; CHECK-NEXT:    br i1 [[TMP1433]], label %[[PRED_STORE_IF949:.*]], label %[[PRED_STORE_CONTINUE950:.*]]
+; CHECK:       [[PRED_STORE_IF949]]:
+; CHECK-NEXT:    [[TMP1434:%.*]] = add i64 [[INDEX]], 475
+; CHECK-NEXT:    [[TMP1435:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1434]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1435]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE950]]
+; CHECK:       [[PRED_STORE_CONTINUE950]]:
+; CHECK-NEXT:    [[TMP1436:%.*]] = extractelement <64 x i1> [[TMP7]], i32 28
+; CHECK-NEXT:    br i1 [[TMP1436]], label %[[PRED_STORE_IF951:.*]], label %[[PRED_STORE_CONTINUE952:.*]]
+; CHECK:       [[PRED_STORE_IF951]]:
+; CHECK-NEXT:    [[TMP1437:%.*]] = add i64 [[INDEX]], 476
+; CHECK-NEXT:    [[TMP1438:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1437]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1438]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE952]]
+; CHECK:       [[PRED_STORE_CONTINUE952]]:
+; CHECK-NEXT:    [[TMP1439:%.*]] = extractelement <64 x i1> [[TMP7]], i32 29
+; CHECK-NEXT:    br i1 [[TMP1439]], label %[[PRED_STORE_IF953:.*]], label %[[PRED_STORE_CONTINUE954:.*]]
+; CHECK:       [[PRED_STORE_IF953]]:
+; CHECK-NEXT:    [[TMP1440:%.*]] = add i64 [[INDEX]], 477
+; CHECK-NEXT:    [[TMP1441:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1440]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1441]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE954]]
+; CHECK:       [[PRED_STORE_CONTINUE954]]:
+; CHECK-NEXT:    [[TMP1442:%.*]] = extractelement <64 x i1> [[TMP7]], i32 30
+; CHECK-NEXT:    br i1 [[TMP1442]], label %[[PRED_STORE_IF955:.*]], label %[[PRED_STORE_CONTINUE956:.*]]
+; CHECK:       [[PRED_STORE_IF955]]:
+; CHECK-NEXT:    [[TMP1443:%.*]] = add i64 [[INDEX]], 478
+; CHECK-NEXT:    [[TMP1444:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1443]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1444]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE956]]
+; CHECK:       [[PRED_STORE_CONTINUE956]]:
+; CHECK-NEXT:    [[TMP1445:%.*]] = extractelement <64 x i1> [[TMP7]], i32 31
+; CHECK-NEXT:    br i1 [[TMP1445]], label %[[PRED_STORE_IF957:.*]], label %[[PRED_STORE_CONTINUE958:.*]]
+; CHECK:       [[PRED_STORE_IF957]]:
+; CHECK-NEXT:    [[TMP1446:%.*]] = add i64 [[INDEX]], 479
+; CHECK-NEXT:    [[TMP1447:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1446]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1447]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE958]]
+; CHECK:       [[PRED_STORE_CONTINUE958]]:
+; CHECK-NEXT:    [[TMP1448:%.*]] = extractelement <64 x i1> [[TMP7]], i32 32
+; CHECK-NEXT:    br i1 [[TMP1448]], label %[[PRED_STORE_IF959:.*]], label %[[PRED_STORE_CONTINUE960:.*]]
+; CHECK:       [[PRED_STORE_IF959]]:
+; CHECK-NEXT:    [[TMP1449:%.*]] = add i64 [[INDEX]], 480
+; CHECK-NEXT:    [[TMP1450:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1449]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1450]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE960]]
+; CHECK:       [[PRED_STORE_CONTINUE960]]:
+; CHECK-NEXT:    [[TMP1451:%.*]] = extractelement <64 x i1> [[TMP7]], i32 33
+; CHECK-NEXT:    br i1 [[TMP1451]], label %[[PRED_STORE_IF961:.*]], label %[[PRED_STORE_CONTINUE962:.*]]
+; CHECK:       [[PRED_STORE_IF961]]:
+; CHECK-NEXT:    [[TMP1452:%.*]] = add i64 [[INDEX]], 481
+; CHECK-NEXT:    [[TMP1453:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1452]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1453]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE962]]
+; CHECK:       [[PRED_STORE_CONTINUE962]]:
+; CHECK-NEXT:    [[TMP1454:%.*]] = extractelement <64 x i1> [[TMP7]], i32 34
+; CHECK-NEXT:    br i1 [[TMP1454]], label %[[PRED_STORE_IF963:.*]], label %[[PRED_STORE_CONTINUE964:.*]]
+; CHECK:       [[PRED_STORE_IF963]]:
+; CHECK-NEXT:    [[TMP1455:%.*]] = add i64 [[INDEX]], 482
+; CHECK-NEXT:    [[TMP1456:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1455]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1456]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE964]]
+; CHECK:       [[PRED_STORE_CONTINUE964]]:
+; CHECK-NEXT:    [[TMP1457:%.*]] = extractelement <64 x i1> [[TMP7]], i32 35
+; CHECK-NEXT:    br i1 [[TMP1457]], label %[[PRED_STORE_IF965:.*]], label %[[PRED_STORE_CONTINUE966:.*]]
+; CHECK:       [[PRED_STORE_IF965]]:
+; CHECK-NEXT:    [[TMP1458:%.*]] = add i64 [[INDEX]], 483
+; CHECK-NEXT:    [[TMP1459:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1458]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1459]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE966]]
+; CHECK:       [[PRED_STORE_CONTINUE966]]:
+; CHECK-NEXT:    [[TMP1460:%.*]] = extractelement <64 x i1> [[TMP7]], i32 36
+; CHECK-NEXT:    br i1 [[TMP1460]], label %[[PRED_STORE_IF967:.*]], label %[[PRED_STORE_CONTINUE968:.*]]
+; CHECK:       [[PRED_STORE_IF967]]:
+; CHECK-NEXT:    [[TMP1461:%.*]] = add i64 [[INDEX]], 484
+; CHECK-NEXT:    [[TMP1462:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1461]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1462]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE968]]
+; CHECK:       [[PRED_STORE_CONTINUE968]]:
+; CHECK-NEXT:    [[TMP1463:%.*]] = extractelement <64 x i1> [[TMP7]], i32 37
+; CHECK-NEXT:    br i1 [[TMP1463]], label %[[PRED_STORE_IF969:.*]], label %[[PRED_STORE_CONTINUE970:.*]]
+; CHECK:       [[PRED_STORE_IF969]]:
+; CHECK-NEXT:    [[TMP1464:%.*]] = add i64 [[INDEX]], 485
+; CHECK-NEXT:    [[TMP1465:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1464]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1465]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE970]]
+; CHECK:       [[PRED_STORE_CONTINUE970]]:
+; CHECK-NEXT:    [[TMP1466:%.*]] = extractelement <64 x i1> [[TMP7]], i32 38
+; CHECK-NEXT:    br i1 [[TMP1466]], label %[[PRED_STORE_IF971:.*]], label %[[PRED_STORE_CONTINUE972:.*]]
+; CHECK:       [[PRED_STORE_IF971]]:
+; CHECK-NEXT:    [[TMP1467:%.*]] = add i64 [[INDEX]], 486
+; CHECK-NEXT:    [[TMP1468:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1467]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1468]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE972]]
+; CHECK:       [[PRED_STORE_CONTINUE972]]:
+; CHECK-NEXT:    [[TMP1469:%.*]] = extractelement <64 x i1> [[TMP7]], i32 39
+; CHECK-NEXT:    br i1 [[TMP1469]], label %[[PRED_STORE_IF973:.*]], label %[[PRED_STORE_CONTINUE974:.*]]
+; CHECK:       [[PRED_STORE_IF973]]:
+; CHECK-NEXT:    [[TMP1470:%.*]] = add i64 [[INDEX]], 487
+; CHECK-NEXT:    [[TMP1471:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1470]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1471]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE974]]
+; CHECK:       [[PRED_STORE_CONTINUE974]]:
+; CHECK-NEXT:    [[TMP1472:%.*]] = extractelement <64 x i1> [[TMP7]], i32 40
+; CHECK-NEXT:    br i1 [[TMP1472]], label %[[PRED_STORE_IF975:.*]], label %[[PRED_STORE_CONTINUE976:.*]]
+; CHECK:       [[PRED_STORE_IF975]]:
+; CHECK-NEXT:    [[TMP1473:%.*]] = add i64 [[INDEX]], 488
+; CHECK-NEXT:    [[TMP1474:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1473]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1474]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE976]]
+; CHECK:       [[PRED_STORE_CONTINUE976]]:
+; CHECK-NEXT:    [[TMP1475:%.*]] = extractelement <64 x i1> [[TMP7]], i32 41
+; CHECK-NEXT:    br i1 [[TMP1475]], label %[[PRED_STORE_IF977:.*]], label %[[PRED_STORE_CONTINUE978:.*]]
+; CHECK:       [[PRED_STORE_IF977]]:
+; CHECK-NEXT:    [[TMP1476:%.*]] = add i64 [[INDEX]], 489
+; CHECK-NEXT:    [[TMP1477:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1476]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1477]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE978]]
+; CHECK:       [[PRED_STORE_CONTINUE978]]:
+; CHECK-NEXT:    [[TMP1478:%.*]] = extractelement <64 x i1> [[TMP7]], i32 42
+; CHECK-NEXT:    br i1 [[TMP1478]], label %[[PRED_STORE_IF979:.*]], label %[[PRED_STORE_CONTINUE980:.*]]
+; CHECK:       [[PRED_STORE_IF979]]:
+; CHECK-NEXT:    [[TMP1479:%.*]] = add i64 [[INDEX]], 490
+; CHECK-NEXT:    [[TMP1480:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1479]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1480]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE980]]
+; CHECK:       [[PRED_STORE_CONTINUE980]]:
+; CHECK-NEXT:    [[TMP1481:%.*]] = extractelement <64 x i1> [[TMP7]], i32 43
+; CHECK-NEXT:    br i1 [[TMP1481]], label %[[PRED_STORE_IF981:.*]], label %[[PRED_STORE_CONTINUE982:.*]]
+; CHECK:       [[PRED_STORE_IF981]]:
+; CHECK-NEXT:    [[TMP1482:%.*]] = add i64 [[INDEX]], 491
+; CHECK-NEXT:    [[TMP1483:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1482]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1483]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE982]]
+; CHECK:       [[PRED_STORE_CONTINUE982]]:
+; CHECK-NEXT:    [[TMP1484:%.*]] = extractelement <64 x i1> [[TMP7]], i32 44
+; CHECK-NEXT:    br i1 [[TMP1484]], label %[[PRED_STORE_IF983:.*]], label %[[PRED_STORE_CONTINUE984:.*]]
+; CHECK:       [[PRED_STORE_IF983]]:
+; CHECK-NEXT:    [[TMP1485:%.*]] = add i64 [[INDEX]], 492
+; CHECK-NEXT:    [[TMP1486:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1485]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1486]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE984]]
+; CHECK:       [[PRED_STORE_CONTINUE984]]:
+; CHECK-NEXT:    [[TMP1487:%.*]] = extractelement <64 x i1> [[TMP7]], i32 45
+; CHECK-NEXT:    br i1 [[TMP1487]], label %[[PRED_STORE_IF985:.*]], label %[[PRED_STORE_CONTINUE986:.*]]
+; CHECK:       [[PRED_STORE_IF985]]:
+; CHECK-NEXT:    [[TMP1488:%.*]] = add i64 [[INDEX]], 493
+; CHECK-NEXT:    [[TMP1489:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1488]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1489]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE986]]
+; CHECK:       [[PRED_STORE_CONTINUE986]]:
+; CHECK-NEXT:    [[TMP1490:%.*]] = extractelement <64 x i1> [[TMP7]], i32 46
+; CHECK-NEXT:    br i1 [[TMP1490]], label %[[PRED_STORE_IF987:.*]], label %[[PRED_STORE_CONTINUE988:.*]]
+; CHECK:       [[PRED_STORE_IF987]]:
+; CHECK-NEXT:    [[TMP1491:%.*]] = add i64 [[INDEX]], 494
+; CHECK-NEXT:    [[TMP1492:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1491]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1492]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE988]]
+; CHECK:       [[PRED_STORE_CONTINUE988]]:
+; CHECK-NEXT:    [[TMP1493:%.*]] = extractelement <64 x i1> [[TMP7]], i32 47
+; CHECK-NEXT:    br i1 [[TMP1493]], label %[[PRED_STORE_IF989:.*]], label %[[PRED_STORE_CONTINUE990:.*]]
+; CHECK:       [[PRED_STORE_IF989]]:
+; CHECK-NEXT:    [[TMP1494:%.*]] = add i64 [[INDEX]], 495
+; CHECK-NEXT:    [[TMP1495:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1494]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1495]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE990]]
+; CHECK:       [[PRED_STORE_CONTINUE990]]:
+; CHECK-NEXT:    [[TMP1496:%.*]] = extractelement <64 x i1> [[TMP7]], i32 48
+; CHECK-NEXT:    br i1 [[TMP1496]], label %[[PRED_STORE_IF991:.*]], label %[[PRED_STORE_CONTINUE992:.*]]
+; CHECK:       [[PRED_STORE_IF991]]:
+; CHECK-NEXT:    [[TMP1497:%.*]] = add i64 [[INDEX]], 496
+; CHECK-NEXT:    [[TMP1498:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1497]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1498]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE992]]
+; CHECK:       [[PRED_STORE_CONTINUE992]]:
+; CHECK-NEXT:    [[TMP1499:%.*]] = extractelement <64 x i1> [[TMP7]], i32 49
+; CHECK-NEXT:    br i1 [[TMP1499]], label %[[PRED_STORE_IF993:.*]], label %[[PRED_STORE_CONTINUE994:.*]]
+; CHECK:       [[PRED_STORE_IF993]]:
+; CHECK-NEXT:    [[TMP1500:%.*]] = add i64 [[INDEX]], 497
+; CHECK-NEXT:    [[TMP1501:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1500]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1501]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE994]]
+; CHECK:       [[PRED_STORE_CONTINUE994]]:
+; CHECK-NEXT:    [[TMP1502:%.*]] = extractelement <64 x i1> [[TMP7]], i32 50
+; CHECK-NEXT:    br i1 [[TMP1502]], label %[[PRED_STORE_IF995:.*]], label %[[PRED_STORE_CONTINUE996:.*]]
+; CHECK:       [[PRED_STORE_IF995]]:
+; CHECK-NEXT:    [[TMP1503:%.*]] = add i64 [[INDEX]], 498
+; CHECK-NEXT:    [[TMP1504:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1503]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1504]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE996]]
+; CHECK:       [[PRED_STORE_CONTINUE996]]:
+; CHECK-NEXT:    [[TMP1505:%.*]] = extractelement <64 x i1> [[TMP7]], i32 51
+; CHECK-NEXT:    br i1 [[TMP1505]], label %[[PRED_STORE_IF997:.*]], label %[[PRED_STORE_CONTINUE998:.*]]
+; CHECK:       [[PRED_STORE_IF997]]:
+; CHECK-NEXT:    [[TMP1506:%.*]] = add i64 [[INDEX]], 499
+; CHECK-NEXT:    [[TMP1507:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1506]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1507]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE998]]
+; CHECK:       [[PRED_STORE_CONTINUE998]]:
+; CHECK-NEXT:    [[TMP1508:%.*]] = extractelement <64 x i1> [[TMP7]], i32 52
+; CHECK-NEXT:    br i1 [[TMP1508]], label %[[PRED_STORE_IF999:.*]], label %[[PRED_STORE_CONTINUE1000:.*]]
+; CHECK:       [[PRED_STORE_IF999]]:
+; CHECK-NEXT:    [[TMP1509:%.*]] = add i64 [[INDEX]], 500
+; CHECK-NEXT:    [[TMP1510:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1509]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1510]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1000]]
+; CHECK:       [[PRED_STORE_CONTINUE1000]]:
+; CHECK-NEXT:    [[TMP1511:%.*]] = extractelement <64 x i1> [[TMP7]], i32 53
+; CHECK-NEXT:    br i1 [[TMP1511]], label %[[PRED_STORE_IF1001:.*]], label %[[PRED_STORE_CONTINUE1002:.*]]
+; CHECK:       [[PRED_STORE_IF1001]]:
+; CHECK-NEXT:    [[TMP1512:%.*]] = add i64 [[INDEX]], 501
+; CHECK-NEXT:    [[TMP1513:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1512]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1513]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1002]]
+; CHECK:       [[PRED_STORE_CONTINUE1002]]:
+; CHECK-NEXT:    [[TMP1514:%.*]] = extractelement <64 x i1> [[TMP7]], i32 54
+; CHECK-NEXT:    br i1 [[TMP1514]], label %[[PRED_STORE_IF1003:.*]], label %[[PRED_STORE_CONTINUE1004:.*]]
+; CHECK:       [[PRED_STORE_IF1003]]:
+; CHECK-NEXT:    [[TMP1515:%.*]] = add i64 [[INDEX]], 502
+; CHECK-NEXT:    [[TMP1516:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1515]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1516]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1004]]
+; CHECK:       [[PRED_STORE_CONTINUE1004]]:
+; CHECK-NEXT:    [[TMP1517:%.*]] = extractelement <64 x i1> [[TMP7]], i32 55
+; CHECK-NEXT:    br i1 [[TMP1517]], label %[[PRED_STORE_IF1005:.*]], label %[[PRED_STORE_CONTINUE1006:.*]]
+; CHECK:       [[PRED_STORE_IF1005]]:
+; CHECK-NEXT:    [[TMP1518:%.*]] = add i64 [[INDEX]], 503
+; CHECK-NEXT:    [[TMP1519:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1518]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1519]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1006]]
+; CHECK:       [[PRED_STORE_CONTINUE1006]]:
+; CHECK-NEXT:    [[TMP1520:%.*]] = extractelement <64 x i1> [[TMP7]], i32 56
+; CHECK-NEXT:    br i1 [[TMP1520]], label %[[PRED_STORE_IF1007:.*]], label %[[PRED_STORE_CONTINUE1008:.*]]
+; CHECK:       [[PRED_STORE_IF1007]]:
+; CHECK-NEXT:    [[TMP1521:%.*]] = add i64 [[INDEX]], 504
+; CHECK-NEXT:    [[TMP1522:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1521]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1522]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1008]]
+; CHECK:       [[PRED_STORE_CONTINUE1008]]:
+; CHECK-NEXT:    [[TMP1523:%.*]] = extractelement <64 x i1> [[TMP7]], i32 57
+; CHECK-NEXT:    br i1 [[TMP1523]], label %[[PRED_STORE_IF1009:.*]], label %[[PRED_STORE_CONTINUE1010:.*]]
+; CHECK:       [[PRED_STORE_IF1009]]:
+; CHECK-NEXT:    [[TMP1524:%.*]] = add i64 [[INDEX]], 505
+; CHECK-NEXT:    [[TMP1525:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1524]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1525]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1010]]
+; CHECK:       [[PRED_STORE_CONTINUE1010]]:
+; CHECK-NEXT:    [[TMP1526:%.*]] = extractelement <64 x i1> [[TMP7]], i32 58
+; CHECK-NEXT:    br i1 [[TMP1526]], label %[[PRED_STORE_IF1011:.*]], label %[[PRED_STORE_CONTINUE1012:.*]]
+; CHECK:       [[PRED_STORE_IF1011]]:
+; CHECK-NEXT:    [[TMP1527:%.*]] = add i64 [[INDEX]], 506
+; CHECK-NEXT:    [[TMP1528:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1527]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1528]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1012]]
+; CHECK:       [[PRED_STORE_CONTINUE1012]]:
+; CHECK-NEXT:    [[TMP1529:%.*]] = extractelement <64 x i1> [[TMP7]], i32 59
+; CHECK-NEXT:    br i1 [[TMP1529]], label %[[PRED_STORE_IF1013:.*]], label %[[PRED_STORE_CONTINUE1014:.*]]
+; CHECK:       [[PRED_STORE_IF1013]]:
+; CHECK-NEXT:    [[TMP1530:%.*]] = add i64 [[INDEX]], 507
+; CHECK-NEXT:    [[TMP1531:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1530]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1531]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1014]]
+; CHECK:       [[PRED_STORE_CONTINUE1014]]:
+; CHECK-NEXT:    [[TMP1532:%.*]] = extractelement <64 x i1> [[TMP7]], i32 60
+; CHECK-NEXT:    br i1 [[TMP1532]], label %[[PRED_STORE_IF1015:.*]], label %[[PRED_STORE_CONTINUE1016:.*]]
+; CHECK:       [[PRED_STORE_IF1015]]:
+; CHECK-NEXT:    [[TMP1533:%.*]] = add i64 [[INDEX]], 508
+; CHECK-NEXT:    [[TMP1534:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1533]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1534]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1016]]
+; CHECK:       [[PRED_STORE_CONTINUE1016]]:
+; CHECK-NEXT:    [[TMP1535:%.*]] = extractelement <64 x i1> [[TMP7]], i32 61
+; CHECK-NEXT:    br i1 [[TMP1535]], label %[[PRED_STORE_IF1017:.*]], label %[[PRED_STORE_CONTINUE1018:.*]]
+; CHECK:       [[PRED_STORE_IF1017]]:
+; CHECK-NEXT:    [[TMP1536:%.*]] = add i64 [[INDEX]], 509
+; CHECK-NEXT:    [[TMP1537:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1536]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1537]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1018]]
+; CHECK:       [[PRED_STORE_CONTINUE1018]]:
+; CHECK-NEXT:    [[TMP1538:%.*]] = extractelement <64 x i1> [[TMP7]], i32 62
+; CHECK-NEXT:    br i1 [[TMP1538]], label %[[PRED_STORE_IF1019:.*]], label %[[PRED_STORE_CONTINUE1020:.*]]
+; CHECK:       [[PRED_STORE_IF1019]]:
+; CHECK-NEXT:    [[TMP1539:%.*]] = add i64 [[INDEX]], 510
+; CHECK-NEXT:    [[TMP1540:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1539]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1540]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1020]]
+; CHECK:       [[PRED_STORE_CONTINUE1020]]:
+; CHECK-NEXT:    [[TMP1541:%.*]] = extractelement <64 x i1> [[TMP7]], i32 63
+; CHECK-NEXT:    br i1 [[TMP1541]], label %[[PRED_STORE_IF1021:.*]], label %[[PRED_STORE_CONTINUE1022]]
+; CHECK:       [[PRED_STORE_IF1021]]:
+; CHECK-NEXT:    [[TMP1542:%.*]] = add i64 [[INDEX]], 511
+; CHECK-NEXT:    [[TMP1543:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1542]]
+; CHECK-NEXT:    store i16 1, ptr [[TMP1543]], align 2
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE1022]]
+; CHECK:       [[PRED_STORE_CONTINUE1022]]:
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <64 x i64> [[STEP_ADD_7]], splat (i64 64)
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 512
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 512, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT:    store i16 1, ptr [[P_IV]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 15
+; CHECK-NEXT:    br i1 [[COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %p.iv = getelementptr inbounds i16, ptr %p, i64 %iv
+  store i16 1, ptr %p.iv, align 2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cond = icmp eq i64 %iv.next, 15
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
index ae20e7d823a583..c03c36d08510f4 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
@@ -1,11 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -p loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
+; RUN: opt -p loop-vectorize -force-vector-width=8 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
 
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
-target triple = "aarch64-unknown-linux-gnu"
-
-define dso_local void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_small_tc_i8(
+define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_small_tc_i8(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -113,8 +110,8 @@ end:
   ret void
 }
 
-define dso_local void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_upper_limit_i8(
+define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_upper_limit_i8(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -222,8 +219,8 @@ end:
   ret void
 }
 
-define dso_local void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_lower_limit_i16(
+define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_lower_limit_i16(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -331,8 +328,8 @@ end:
   ret void
 }
 
-define dso_local void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_upper_limit_i16(
+define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_upper_limit_i16(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -440,8 +437,8 @@ end:
   ret void
 }
 
-define dso_local void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_lower_limit_i32(
+define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_lower_limit_i32(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -549,8 +546,8 @@ end:
   ret void
 }
 
-define dso_local void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_upper_limit_i32(
+define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_upper_limit_i32(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -658,8 +655,8 @@ end:
   ret void
 }
 
-define dso_local void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_lower_limit_i64(
+define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_lower_limit_i64(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -767,8 +764,8 @@ end:
   ret void
 }
 
-define dso_local void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
-; CHECK-LABEL: define dso_local void @canonical_upper_limit_i64(
+define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
+; CHECK-LABEL: define void @canonical_upper_limit_i64(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]

>From 6487668405686110c9179a07f8b684b161aed054 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 19 Dec 2024 11:40:30 +0000
Subject: [PATCH 6/6] Address review

- Use m_Specific to match BTC
- Don't assume TC fits in 64-bits
- Update comment
---
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ab1c1c10ab1f73..b2c2ec08d49b05 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -663,8 +663,8 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
   }
 }
 
-/// Optimize the width of vector induction variables based on \p TC, \p BestVF
-/// and \p BestUF.
+/// Optimize the width of vector induction variables in \p Plan based on a known
+/// constant Trip Count, \p BestVF and \p BestUF.
 static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
                                                      ElementCount BestVF,
                                                      unsigned BestUF) {
@@ -673,10 +673,13 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
   if (!TC || !BestVF.isFixed())
     return false;
 
+  auto TCVal = TC->getValue().tryZExtValue();
+  if (!TCVal)
+    return false;
+
   // Calculate the widest type required for known TC, VF and UF.
-  uint64_t TCVal = TC->getZExtValue();
   uint64_t Width = BestVF.getKnownMinValue() * BestUF;
-  uint64_t MaxVal = alignTo(TCVal, Width) - 1;
+  uint64_t MaxVal = alignTo(*TCVal, Width) - 1;
   unsigned MaxActiveBits = Log2_64_Ceil(MaxVal);
   unsigned NewBitWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8);
   LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext();
@@ -694,12 +697,11 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
 
     // Currently only handle cases where the single user is a header-mask
     // comparison with the backedge-taken-count.
-    VPValue *Bound;
     using namespace VPlanPatternMatch;
     if (!match(*WideIV->user_begin(),
-               m_Binary<Instruction::ICmp>(m_Specific(WideIV),
-                                           m_VPValue(Bound))) ||
-        Bound != Plan.getOrCreateBackedgeTakenCount())
+               m_Binary<Instruction::ICmp>(
+                   m_Specific(WideIV),
+                   m_Specific(Plan.getOrCreateBackedgeTakenCount()))))
       continue;
 
     // Update IV operands and comparison bound to use new narrower type.
@@ -707,7 +709,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
     WideIV->setStartValue(NewStart);
     auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
     WideIV->setStepValue(NewStep);
-    auto *NewBound = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, TCVal - 1));
+    auto *NewBound = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, *TCVal - 1));
     auto *Cmp = dyn_cast<VPInstruction>(*WideIV->user_begin());
     Cmp->setOperand(1, NewBound);
 



More information about the llvm-commits mailing list