[llvm] [LoopVectorize] Use CodeSize as the cost kind for minsize (PR #124119)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 24 06:44:40 PST 2025


================
@@ -0,0 +1,1036 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; The tests here check for differences in behaviour between the default,
+; optsize, and minsize.
+; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s --check-prefix=DEFAULT
+; RUN: opt -passes=forceattrs,loop-vectorize -force-attribute=optsize -S < %s | FileCheck %s --check-prefix=OPTSIZE
+; RUN: opt -passes=forceattrs,loop-vectorize -force-attribute=minsize -S < %s | FileCheck %s --check-prefix=MINSIZE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+ at A = global [1000 x i16] zeroinitializer, align 2
+ at B = global [1000 x i32] zeroinitializer, align 4
+ at C = global [1000 x i32] zeroinitializer, align 4
+
+; This should always vectorize, as using vector instructions eliminates the loop
+; which is both faster and smaller (a scalar version is emitted, but the branch
+; to it is false and it's later removed).
+define void @always_vectorize(ptr %p, i32 %x) {
+; DEFAULT-LABEL: define void @always_vectorize(
+; DEFAULT-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) {
+; DEFAULT-NEXT:  [[ENTRY:.*]]:
+; DEFAULT-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; DEFAULT:       [[VECTOR_PH]]:
+; DEFAULT-NEXT:    br label %[[VECTOR_BODY:.*]]
+; DEFAULT:       [[VECTOR_BODY]]:
+; DEFAULT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0
+; DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; DEFAULT-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP5]], align 4
+; DEFAULT-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; DEFAULT:       [[MIDDLE_BLOCK]]:
+; DEFAULT-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
+; DEFAULT:       [[SCALAR_PH]]:
+; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; DEFAULT-NEXT:    br label %[[FOR_BODY:.*]]
+; DEFAULT:       [[FOR_BODY]]:
+; DEFAULT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]]
+; DEFAULT-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; DEFAULT-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]]
+; DEFAULT-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; DEFAULT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
+; DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; DEFAULT:       [[FOR_COND_CLEANUP]]:
+; DEFAULT-NEXT:    ret void
+;
+; OPTSIZE-LABEL: define void @always_vectorize(
+; OPTSIZE-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; OPTSIZE-NEXT:  [[ENTRY:.*]]:
+; OPTSIZE-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; OPTSIZE:       [[VECTOR_PH]]:
+; OPTSIZE-NEXT:    br label %[[VECTOR_BODY:.*]]
+; OPTSIZE:       [[VECTOR_BODY]]:
+; OPTSIZE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0
+; OPTSIZE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; OPTSIZE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; OPTSIZE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
+; OPTSIZE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; OPTSIZE-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; OPTSIZE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; OPTSIZE-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP5]], align 4
+; OPTSIZE-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; OPTSIZE:       [[MIDDLE_BLOCK]]:
+; OPTSIZE-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
+; OPTSIZE:       [[SCALAR_PH]]:
+; OPTSIZE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; OPTSIZE-NEXT:    br label %[[FOR_BODY:.*]]
+; OPTSIZE:       [[FOR_BODY]]:
+; OPTSIZE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; OPTSIZE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]]
+; OPTSIZE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; OPTSIZE-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]]
+; OPTSIZE-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; OPTSIZE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; OPTSIZE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
+; OPTSIZE-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; OPTSIZE:       [[FOR_COND_CLEANUP]]:
+; OPTSIZE-NEXT:    ret void
+;
+; MINSIZE-LABEL: define void @always_vectorize(
+; MINSIZE-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; MINSIZE-NEXT:  [[ENTRY:.*]]:
+; MINSIZE-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; MINSIZE:       [[VECTOR_PH]]:
+; MINSIZE-NEXT:    br label %[[VECTOR_BODY:.*]]
+; MINSIZE:       [[VECTOR_BODY]]:
+; MINSIZE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0
+; MINSIZE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+; MINSIZE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
+; MINSIZE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; MINSIZE-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; MINSIZE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+; MINSIZE-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
+; MINSIZE-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; MINSIZE:       [[MIDDLE_BLOCK]]:
+; MINSIZE-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
+; MINSIZE:       [[SCALAR_PH]]:
+; MINSIZE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; MINSIZE-NEXT:    br label %[[FOR_BODY:.*]]
+; MINSIZE:       [[FOR_BODY]]:
+; MINSIZE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; MINSIZE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]]
+; MINSIZE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; MINSIZE-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]]
+; MINSIZE-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; MINSIZE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; MINSIZE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
+; MINSIZE-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; MINSIZE:       [[FOR_COND_CLEANUP]]:
+; MINSIZE-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %add = add nsw i32 %0, %x
+  store i32 %add, ptr %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+; This should vectorize only without optsize, as it needs a scalar version
+; which increases code size.
+define void @vectorize_without_optsize(ptr %p, i32 %x, i64 %n) {
+; DEFAULT-LABEL: define void @vectorize_without_optsize(
+; DEFAULT-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i64 [[N:%.*]]) {
+; DEFAULT-NEXT:  [[ENTRY:.*]]:
+; DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; DEFAULT:       [[VECTOR_PH]]:
+; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT:    br label %[[VECTOR_BODY:.*]]
+; DEFAULT:       [[VECTOR_BODY]]:
+; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; DEFAULT-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[TMP0]]
+; DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4
+; DEFAULT-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; DEFAULT-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; DEFAULT-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4
+; DEFAULT-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP3]], align 4
+; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; DEFAULT-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; DEFAULT:       [[MIDDLE_BLOCK]]:
+; DEFAULT-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; DEFAULT-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
+; DEFAULT:       [[SCALAR_PH]]:
+; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; DEFAULT-NEXT:    br label %[[FOR_BODY:.*]]
+; DEFAULT:       [[FOR_BODY]]:
+; DEFAULT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]]
+; DEFAULT-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; DEFAULT-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[X]]
+; DEFAULT-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; DEFAULT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; DEFAULT:       [[FOR_COND_CLEANUP]]:
+; DEFAULT-NEXT:    ret void
+;
+; OPTSIZE-LABEL: define void @vectorize_without_optsize(
+; OPTSIZE-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; OPTSIZE-NEXT:  [[ENTRY:.*]]:
+; OPTSIZE-NEXT:    br label %[[FOR_BODY:.*]]
+; OPTSIZE:       [[FOR_BODY]]:
+; OPTSIZE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; OPTSIZE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]]
+; OPTSIZE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; OPTSIZE-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[X]]
+; OPTSIZE-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; OPTSIZE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; OPTSIZE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; OPTSIZE-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY]]
+; OPTSIZE:       [[FOR_COND_CLEANUP]]:
+; OPTSIZE-NEXT:    ret void
+;
+; MINSIZE-LABEL: define void @vectorize_without_optsize(
+; MINSIZE-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; MINSIZE-NEXT:  [[ENTRY:.*]]:
+; MINSIZE-NEXT:    br label %[[FOR_BODY:.*]]
+; MINSIZE:       [[FOR_BODY]]:
+; MINSIZE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; MINSIZE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]]
+; MINSIZE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; MINSIZE-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[X]]
+; MINSIZE-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; MINSIZE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; MINSIZE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; MINSIZE-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY]]
+; MINSIZE:       [[FOR_COND_CLEANUP]]:
+; MINSIZE-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %add = add nsw i32 %0, %x
+  store i32 %add, ptr %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+; This should be vectorized and tail predicated without optsize, as that's
+; faster, but not with optsize, as it's much larger.
+; FIXME: Currently we avoid tail predication only with minsize
+define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) {
+; DEFAULT-LABEL: define void @tail_predicate_without_optsize(
+; DEFAULT-SAME: ptr [[P:%.*]], i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]], i32 [[N:%.*]]) {
+; DEFAULT-NEXT:  [[ENTRY:.*]]:
+; DEFAULT-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; DEFAULT:       [[VECTOR_PH]]:
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i8> poison, i8 [[B]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT3]], <16 x i8> poison, <16 x i32> zeroinitializer
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <16 x i8> poison, i8 [[C]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer
+; DEFAULT-NEXT:    br label %[[VECTOR_BODY:.*]]
+; DEFAULT:       [[VECTOR_BODY]]:
+; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE36:.*]] ]
+; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE36]] ]
+; DEFAULT-NEXT:    [[VEC_IND1:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[PRED_STORE_CONTINUE36]] ]
+; DEFAULT-NEXT:    [[TMP0:%.*]] = icmp ule <16 x i64> [[VEC_IND]], splat (i64 14)
+; DEFAULT-NEXT:    [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]]
+; DEFAULT-NEXT:    [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1)
+; DEFAULT-NEXT:    [[TMP3:%.*]] = mul <16 x i8> [[TMP2]], [[BROADCAST_SPLAT4]]
+; DEFAULT-NEXT:    [[TMP4:%.*]] = add <16 x i8> [[TMP3]], [[TMP1]]
+; DEFAULT-NEXT:    [[TMP5:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 2)
+; DEFAULT-NEXT:    [[TMP6:%.*]] = mul <16 x i8> [[TMP5]], [[BROADCAST_SPLAT6]]
+; DEFAULT-NEXT:    [[TMP7:%.*]] = add <16 x i8> [[TMP4]], [[TMP6]]
+; DEFAULT-NEXT:    [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
+; DEFAULT-NEXT:    br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; DEFAULT:       [[PRED_STORE_IF]]:
+; DEFAULT-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
+; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]]
+; DEFAULT-NEXT:    [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0
+; DEFAULT-NEXT:    store i8 [[TMP11]], ptr [[TMP10]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; DEFAULT:       [[PRED_STORE_CONTINUE]]:
+; DEFAULT-NEXT:    [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
+; DEFAULT-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; DEFAULT:       [[PRED_STORE_IF7]]:
+; DEFAULT-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]]
+; DEFAULT-NEXT:    [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1
+; DEFAULT-NEXT:    store i8 [[TMP15]], ptr [[TMP14]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; DEFAULT:       [[PRED_STORE_CONTINUE8]]:
+; DEFAULT-NEXT:    [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
+; DEFAULT-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; DEFAULT:       [[PRED_STORE_IF9]]:
+; DEFAULT-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 2
+; DEFAULT-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]]
+; DEFAULT-NEXT:    [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2
+; DEFAULT-NEXT:    store i8 [[TMP19]], ptr [[TMP18]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; DEFAULT:       [[PRED_STORE_CONTINUE10]]:
+; DEFAULT-NEXT:    [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
+; DEFAULT-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; DEFAULT:       [[PRED_STORE_IF11]]:
+; DEFAULT-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 3
+; DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]]
+; DEFAULT-NEXT:    [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3
+; DEFAULT-NEXT:    store i8 [[TMP23]], ptr [[TMP22]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; DEFAULT:       [[PRED_STORE_CONTINUE12]]:
+; DEFAULT-NEXT:    [[TMP24:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
+; DEFAULT-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; DEFAULT:       [[PRED_STORE_IF13]]:
+; DEFAULT-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 4
+; DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]]
+; DEFAULT-NEXT:    [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4
+; DEFAULT-NEXT:    store i8 [[TMP27]], ptr [[TMP26]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; DEFAULT:       [[PRED_STORE_CONTINUE14]]:
+; DEFAULT-NEXT:    [[TMP28:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
+; DEFAULT-NEXT:    br i1 [[TMP28]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; DEFAULT:       [[PRED_STORE_IF15]]:
+; DEFAULT-NEXT:    [[TMP29:%.*]] = add i64 [[INDEX]], 5
+; DEFAULT-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]]
+; DEFAULT-NEXT:    [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5
+; DEFAULT-NEXT:    store i8 [[TMP31]], ptr [[TMP30]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
+; DEFAULT:       [[PRED_STORE_CONTINUE16]]:
+; DEFAULT-NEXT:    [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
+; DEFAULT-NEXT:    br i1 [[TMP32]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; DEFAULT:       [[PRED_STORE_IF17]]:
+; DEFAULT-NEXT:    [[TMP33:%.*]] = add i64 [[INDEX]], 6
+; DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]]
+; DEFAULT-NEXT:    [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6
+; DEFAULT-NEXT:    store i8 [[TMP35]], ptr [[TMP34]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
+; DEFAULT:       [[PRED_STORE_CONTINUE18]]:
+; DEFAULT-NEXT:    [[TMP36:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
+; DEFAULT-NEXT:    br i1 [[TMP36]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; DEFAULT:       [[PRED_STORE_IF19]]:
+; DEFAULT-NEXT:    [[TMP37:%.*]] = add i64 [[INDEX]], 7
+; DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]]
+; DEFAULT-NEXT:    [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7
+; DEFAULT-NEXT:    store i8 [[TMP39]], ptr [[TMP38]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
+; DEFAULT:       [[PRED_STORE_CONTINUE20]]:
+; DEFAULT-NEXT:    [[TMP40:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
+; DEFAULT-NEXT:    br i1 [[TMP40]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; DEFAULT:       [[PRED_STORE_IF21]]:
+; DEFAULT-NEXT:    [[TMP41:%.*]] = add i64 [[INDEX]], 8
+; DEFAULT-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]]
+; DEFAULT-NEXT:    [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8
+; DEFAULT-NEXT:    store i8 [[TMP43]], ptr [[TMP42]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
+; DEFAULT:       [[PRED_STORE_CONTINUE22]]:
+; DEFAULT-NEXT:    [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
+; DEFAULT-NEXT:    br i1 [[TMP44]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; DEFAULT:       [[PRED_STORE_IF23]]:
+; DEFAULT-NEXT:    [[TMP45:%.*]] = add i64 [[INDEX]], 9
+; DEFAULT-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]]
+; DEFAULT-NEXT:    [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9
+; DEFAULT-NEXT:    store i8 [[TMP47]], ptr [[TMP46]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
+; DEFAULT:       [[PRED_STORE_CONTINUE24]]:
+; DEFAULT-NEXT:    [[TMP48:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
+; DEFAULT-NEXT:    br i1 [[TMP48]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; DEFAULT:       [[PRED_STORE_IF25]]:
+; DEFAULT-NEXT:    [[TMP49:%.*]] = add i64 [[INDEX]], 10
+; DEFAULT-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]]
+; DEFAULT-NEXT:    [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10
+; DEFAULT-NEXT:    store i8 [[TMP51]], ptr [[TMP50]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
+; DEFAULT:       [[PRED_STORE_CONTINUE26]]:
+; DEFAULT-NEXT:    [[TMP52:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
+; DEFAULT-NEXT:    br i1 [[TMP52]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; DEFAULT:       [[PRED_STORE_IF27]]:
+; DEFAULT-NEXT:    [[TMP53:%.*]] = add i64 [[INDEX]], 11
+; DEFAULT-NEXT:    [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]]
+; DEFAULT-NEXT:    [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11
+; DEFAULT-NEXT:    store i8 [[TMP55]], ptr [[TMP54]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
+; DEFAULT:       [[PRED_STORE_CONTINUE28]]:
+; DEFAULT-NEXT:    [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
+; DEFAULT-NEXT:    br i1 [[TMP56]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; DEFAULT:       [[PRED_STORE_IF29]]:
+; DEFAULT-NEXT:    [[TMP57:%.*]] = add i64 [[INDEX]], 12
+; DEFAULT-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]]
+; DEFAULT-NEXT:    [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12
+; DEFAULT-NEXT:    store i8 [[TMP59]], ptr [[TMP58]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
+; DEFAULT:       [[PRED_STORE_CONTINUE30]]:
+; DEFAULT-NEXT:    [[TMP60:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
+; DEFAULT-NEXT:    br i1 [[TMP60]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]]
+; DEFAULT:       [[PRED_STORE_IF31]]:
+; DEFAULT-NEXT:    [[TMP61:%.*]] = add i64 [[INDEX]], 13
+; DEFAULT-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]]
+; DEFAULT-NEXT:    [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13
+; DEFAULT-NEXT:    store i8 [[TMP63]], ptr [[TMP62]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE32]]
+; DEFAULT:       [[PRED_STORE_CONTINUE32]]:
+; DEFAULT-NEXT:    [[TMP64:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
+; DEFAULT-NEXT:    br i1 [[TMP64]], label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]]
+; DEFAULT:       [[PRED_STORE_IF33]]:
+; DEFAULT-NEXT:    [[TMP65:%.*]] = add i64 [[INDEX]], 14
+; DEFAULT-NEXT:    [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]]
+; DEFAULT-NEXT:    [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14
+; DEFAULT-NEXT:    store i8 [[TMP67]], ptr [[TMP66]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE34]]
+; DEFAULT:       [[PRED_STORE_CONTINUE34]]:
+; DEFAULT-NEXT:    [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
+; DEFAULT-NEXT:    br i1 [[TMP68]], label %[[PRED_STORE_IF35:.*]], label %[[PRED_STORE_CONTINUE36]]
+; DEFAULT:       [[PRED_STORE_IF35]]:
+; DEFAULT-NEXT:    [[TMP69:%.*]] = add i64 [[INDEX]], 15
+; DEFAULT-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]]
+; DEFAULT-NEXT:    [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15
+; DEFAULT-NEXT:    store i8 [[TMP71]], ptr [[TMP70]], align 1
+; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE36]]
+; DEFAULT:       [[PRED_STORE_CONTINUE36]]:
+; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
+; DEFAULT-NEXT:    [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16)
+; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; DEFAULT-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; DEFAULT:       [[MIDDLE_BLOCK]]:
+; DEFAULT-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
+; DEFAULT:       [[SCALAR_PH]]:
+; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; DEFAULT-NEXT:    br label %[[FOR_BODY:.*]]
+; DEFAULT:       [[FOR_BODY]]:
+; DEFAULT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; DEFAULT-NEXT:    [[TMP72:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i8
+; DEFAULT-NEXT:    [[MUL:%.*]] = mul i8 [[A]], [[TMP72]]
+; DEFAULT-NEXT:    [[SHR:%.*]] = lshr i8 [[TMP72]], 1
+; DEFAULT-NEXT:    [[MUL5:%.*]] = mul i8 [[SHR]], [[B]]
+; DEFAULT-NEXT:    [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]]
+; DEFAULT-NEXT:    [[SHR7:%.*]] = lshr i8 [[TMP72]], 2
+; DEFAULT-NEXT:    [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]]
+; DEFAULT-NEXT:    [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]]
+; DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
+; DEFAULT-NEXT:    store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1
+; DEFAULT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15
+; DEFAULT-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; DEFAULT:       [[FOR_COND_CLEANUP]]:
+; DEFAULT-NEXT:    ret void
+;
+; OPTSIZE-LABEL: define void @tail_predicate_without_optsize(
+; OPTSIZE-SAME: ptr [[P:%.*]], i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; OPTSIZE-NEXT:  [[ENTRY:.*]]:
+; OPTSIZE-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; OPTSIZE:       [[VECTOR_PH]]:
+; OPTSIZE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0
+; OPTSIZE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+; OPTSIZE-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i8> poison, i8 [[B]], i64 0
+; OPTSIZE-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT3]], <16 x i8> poison, <16 x i32> zeroinitializer
+; OPTSIZE-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <16 x i8> poison, i8 [[C]], i64 0
+; OPTSIZE-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer
+; OPTSIZE-NEXT:    br label %[[VECTOR_BODY:.*]]
+; OPTSIZE:       [[VECTOR_BODY]]:
+; OPTSIZE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE36:.*]] ]
+; OPTSIZE-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE36]] ]
+; OPTSIZE-NEXT:    [[VEC_IND1:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[PRED_STORE_CONTINUE36]] ]
+; OPTSIZE-NEXT:    [[TMP72:%.*]] = icmp ule <16 x i64> [[VEC_IND]], splat (i64 14)
+; OPTSIZE-NEXT:    [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]]
+; OPTSIZE-NEXT:    [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1)
+; OPTSIZE-NEXT:    [[TMP3:%.*]] = mul <16 x i8> [[TMP2]], [[BROADCAST_SPLAT4]]
+; OPTSIZE-NEXT:    [[TMP4:%.*]] = add <16 x i8> [[TMP3]], [[TMP1]]
+; OPTSIZE-NEXT:    [[TMP5:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 2)
+; OPTSIZE-NEXT:    [[TMP6:%.*]] = mul <16 x i8> [[TMP5]], [[BROADCAST_SPLAT6]]
+; OPTSIZE-NEXT:    [[TMP7:%.*]] = add <16 x i8> [[TMP4]], [[TMP6]]
+; OPTSIZE-NEXT:    [[TMP8:%.*]] = extractelement <16 x i1> [[TMP72]], i32 0
+; OPTSIZE-NEXT:    br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; OPTSIZE:       [[PRED_STORE_IF]]:
+; OPTSIZE-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
+; OPTSIZE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]]
+; OPTSIZE-NEXT:    [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0
+; OPTSIZE-NEXT:    store i8 [[TMP11]], ptr [[TMP10]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE]]:
+; OPTSIZE-NEXT:    [[TMP12:%.*]] = extractelement <16 x i1> [[TMP72]], i32 1
+; OPTSIZE-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; OPTSIZE:       [[PRED_STORE_IF7]]:
+; OPTSIZE-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; OPTSIZE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]]
+; OPTSIZE-NEXT:    [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1
+; OPTSIZE-NEXT:    store i8 [[TMP15]], ptr [[TMP14]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE8]]:
+; OPTSIZE-NEXT:    [[TMP16:%.*]] = extractelement <16 x i1> [[TMP72]], i32 2
+; OPTSIZE-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; OPTSIZE:       [[PRED_STORE_IF9]]:
+; OPTSIZE-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 2
+; OPTSIZE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]]
+; OPTSIZE-NEXT:    [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2
+; OPTSIZE-NEXT:    store i8 [[TMP19]], ptr [[TMP18]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE10]]:
+; OPTSIZE-NEXT:    [[TMP20:%.*]] = extractelement <16 x i1> [[TMP72]], i32 3
+; OPTSIZE-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; OPTSIZE:       [[PRED_STORE_IF11]]:
+; OPTSIZE-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 3
+; OPTSIZE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]]
+; OPTSIZE-NEXT:    [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3
+; OPTSIZE-NEXT:    store i8 [[TMP23]], ptr [[TMP22]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE12]]:
+; OPTSIZE-NEXT:    [[TMP24:%.*]] = extractelement <16 x i1> [[TMP72]], i32 4
+; OPTSIZE-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; OPTSIZE:       [[PRED_STORE_IF13]]:
+; OPTSIZE-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 4
+; OPTSIZE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]]
+; OPTSIZE-NEXT:    [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4
+; OPTSIZE-NEXT:    store i8 [[TMP27]], ptr [[TMP26]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE14]]:
+; OPTSIZE-NEXT:    [[TMP28:%.*]] = extractelement <16 x i1> [[TMP72]], i32 5
+; OPTSIZE-NEXT:    br i1 [[TMP28]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; OPTSIZE:       [[PRED_STORE_IF15]]:
+; OPTSIZE-NEXT:    [[TMP29:%.*]] = add i64 [[INDEX]], 5
+; OPTSIZE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]]
+; OPTSIZE-NEXT:    [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5
+; OPTSIZE-NEXT:    store i8 [[TMP31]], ptr [[TMP30]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE16]]:
+; OPTSIZE-NEXT:    [[TMP32:%.*]] = extractelement <16 x i1> [[TMP72]], i32 6
+; OPTSIZE-NEXT:    br i1 [[TMP32]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; OPTSIZE:       [[PRED_STORE_IF17]]:
+; OPTSIZE-NEXT:    [[TMP33:%.*]] = add i64 [[INDEX]], 6
+; OPTSIZE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]]
+; OPTSIZE-NEXT:    [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6
+; OPTSIZE-NEXT:    store i8 [[TMP35]], ptr [[TMP34]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE18]]:
+; OPTSIZE-NEXT:    [[TMP36:%.*]] = extractelement <16 x i1> [[TMP72]], i32 7
+; OPTSIZE-NEXT:    br i1 [[TMP36]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; OPTSIZE:       [[PRED_STORE_IF19]]:
+; OPTSIZE-NEXT:    [[TMP37:%.*]] = add i64 [[INDEX]], 7
+; OPTSIZE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]]
+; OPTSIZE-NEXT:    [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7
+; OPTSIZE-NEXT:    store i8 [[TMP39]], ptr [[TMP38]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE20]]:
+; OPTSIZE-NEXT:    [[TMP40:%.*]] = extractelement <16 x i1> [[TMP72]], i32 8
+; OPTSIZE-NEXT:    br i1 [[TMP40]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; OPTSIZE:       [[PRED_STORE_IF21]]:
+; OPTSIZE-NEXT:    [[TMP41:%.*]] = add i64 [[INDEX]], 8
+; OPTSIZE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]]
+; OPTSIZE-NEXT:    [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8
+; OPTSIZE-NEXT:    store i8 [[TMP43]], ptr [[TMP42]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE22]]:
+; OPTSIZE-NEXT:    [[TMP44:%.*]] = extractelement <16 x i1> [[TMP72]], i32 9
+; OPTSIZE-NEXT:    br i1 [[TMP44]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; OPTSIZE:       [[PRED_STORE_IF23]]:
+; OPTSIZE-NEXT:    [[TMP45:%.*]] = add i64 [[INDEX]], 9
+; OPTSIZE-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]]
+; OPTSIZE-NEXT:    [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9
+; OPTSIZE-NEXT:    store i8 [[TMP47]], ptr [[TMP46]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE24]]:
+; OPTSIZE-NEXT:    [[TMP48:%.*]] = extractelement <16 x i1> [[TMP72]], i32 10
+; OPTSIZE-NEXT:    br i1 [[TMP48]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; OPTSIZE:       [[PRED_STORE_IF25]]:
+; OPTSIZE-NEXT:    [[TMP49:%.*]] = add i64 [[INDEX]], 10
+; OPTSIZE-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]]
+; OPTSIZE-NEXT:    [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10
+; OPTSIZE-NEXT:    store i8 [[TMP51]], ptr [[TMP50]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE26]]:
+; OPTSIZE-NEXT:    [[TMP52:%.*]] = extractelement <16 x i1> [[TMP72]], i32 11
+; OPTSIZE-NEXT:    br i1 [[TMP52]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; OPTSIZE:       [[PRED_STORE_IF27]]:
+; OPTSIZE-NEXT:    [[TMP53:%.*]] = add i64 [[INDEX]], 11
+; OPTSIZE-NEXT:    [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]]
+; OPTSIZE-NEXT:    [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11
+; OPTSIZE-NEXT:    store i8 [[TMP55]], ptr [[TMP54]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE28]]:
+; OPTSIZE-NEXT:    [[TMP56:%.*]] = extractelement <16 x i1> [[TMP72]], i32 12
+; OPTSIZE-NEXT:    br i1 [[TMP56]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; OPTSIZE:       [[PRED_STORE_IF29]]:
+; OPTSIZE-NEXT:    [[TMP57:%.*]] = add i64 [[INDEX]], 12
+; OPTSIZE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]]
+; OPTSIZE-NEXT:    [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12
+; OPTSIZE-NEXT:    store i8 [[TMP59]], ptr [[TMP58]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE30]]:
+; OPTSIZE-NEXT:    [[TMP60:%.*]] = extractelement <16 x i1> [[TMP72]], i32 13
+; OPTSIZE-NEXT:    br i1 [[TMP60]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]]
+; OPTSIZE:       [[PRED_STORE_IF31]]:
+; OPTSIZE-NEXT:    [[TMP61:%.*]] = add i64 [[INDEX]], 13
+; OPTSIZE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]]
+; OPTSIZE-NEXT:    [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13
+; OPTSIZE-NEXT:    store i8 [[TMP63]], ptr [[TMP62]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE32]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE32]]:
+; OPTSIZE-NEXT:    [[TMP64:%.*]] = extractelement <16 x i1> [[TMP72]], i32 14
+; OPTSIZE-NEXT:    br i1 [[TMP64]], label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]]
+; OPTSIZE:       [[PRED_STORE_IF33]]:
+; OPTSIZE-NEXT:    [[TMP65:%.*]] = add i64 [[INDEX]], 14
+; OPTSIZE-NEXT:    [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]]
+; OPTSIZE-NEXT:    [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14
+; OPTSIZE-NEXT:    store i8 [[TMP67]], ptr [[TMP66]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE34]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE34]]:
+; OPTSIZE-NEXT:    [[TMP68:%.*]] = extractelement <16 x i1> [[TMP72]], i32 15
+; OPTSIZE-NEXT:    br i1 [[TMP68]], label %[[PRED_STORE_IF35:.*]], label %[[PRED_STORE_CONTINUE36]]
+; OPTSIZE:       [[PRED_STORE_IF35]]:
+; OPTSIZE-NEXT:    [[TMP69:%.*]] = add i64 [[INDEX]], 15
+; OPTSIZE-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]]
+; OPTSIZE-NEXT:    [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15
+; OPTSIZE-NEXT:    store i8 [[TMP71]], ptr [[TMP70]], align 1
+; OPTSIZE-NEXT:    br label %[[PRED_STORE_CONTINUE36]]
+; OPTSIZE:       [[PRED_STORE_CONTINUE36]]:
+; OPTSIZE-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
+; OPTSIZE-NEXT:    [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16)
+; OPTSIZE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; OPTSIZE-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; OPTSIZE:       [[MIDDLE_BLOCK]]:
+; OPTSIZE-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
+; OPTSIZE:       [[SCALAR_PH]]:
+; OPTSIZE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; OPTSIZE-NEXT:    br label %[[FOR_BODY:.*]]
+; OPTSIZE:       [[FOR_BODY]]:
+; OPTSIZE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; OPTSIZE-NEXT:    [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i8
+; OPTSIZE-NEXT:    [[MUL:%.*]] = mul i8 [[A]], [[TMP0]]
+; OPTSIZE-NEXT:    [[SHR:%.*]] = lshr i8 [[TMP0]], 1
+; OPTSIZE-NEXT:    [[MUL5:%.*]] = mul i8 [[SHR]], [[B]]
+; OPTSIZE-NEXT:    [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]]
+; OPTSIZE-NEXT:    [[SHR7:%.*]] = lshr i8 [[TMP0]], 2
+; OPTSIZE-NEXT:    [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]]
+; OPTSIZE-NEXT:    [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]]
+; OPTSIZE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
+; OPTSIZE-NEXT:    store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1
+; OPTSIZE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; OPTSIZE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15
+; OPTSIZE-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; OPTSIZE:       [[FOR_COND_CLEANUP]]:
+; OPTSIZE-NEXT:    ret void
+;
+; MINSIZE-LABEL: define void @tail_predicate_without_optsize(
+; MINSIZE-SAME: ptr [[P:%.*]], i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; MINSIZE-NEXT:  [[ENTRY:.*]]:
+; MINSIZE-NEXT:    br label %[[FOR_BODY:.*]]
+; MINSIZE:       [[FOR_BODY]]:
+; MINSIZE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; MINSIZE-NEXT:    [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i8
+; MINSIZE-NEXT:    [[MUL:%.*]] = mul i8 [[A]], [[TMP0]]
+; MINSIZE-NEXT:    [[SHR:%.*]] = lshr i8 [[TMP0]], 1
+; MINSIZE-NEXT:    [[MUL5:%.*]] = mul i8 [[SHR]], [[B]]
+; MINSIZE-NEXT:    [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]]
+; MINSIZE-NEXT:    [[SHR7:%.*]] = lshr i8 [[TMP0]], 2
+; MINSIZE-NEXT:    [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]]
+; MINSIZE-NEXT:    [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]]
+; MINSIZE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
+; MINSIZE-NEXT:    store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1
+; MINSIZE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; MINSIZE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15
+; MINSIZE-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY]]
+; MINSIZE:       [[FOR_COND_CLEANUP]]:
+; MINSIZE-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
----------------
fhahn wrote:

Can drop redundant `indvars.` for more compact/cleaner names 
```suggestion
  %iv  = phi i64 [ 0, %entry ], [ %iv, %for.body ]
```

https://github.com/llvm/llvm-project/pull/124119


More information about the llvm-commits mailing list