[llvm] [VPlan] Introduce VPlan-level constant folder (PR #125365)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Thu May 8 03:48:52 PDT 2025


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/125365

>From 35ea506fac806569aca060ec472bc0cc79719beb Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 27 Feb 2025 10:11:53 +0000
Subject: [PATCH 1/7] [LV] Pre-commit tests for const-folder

---
 .../LoopVectorize/constantfolder.ll           | 437 ++++++++++++++++++
 1 file changed, 437 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/constantfolder.ll

diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
new file mode 100644
index 0000000000000..737f2c22da1d8
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
@@ -0,0 +1,437 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
+
+define void @const_fold_ptradd(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_ptradd(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    store i16 0, ptr [[TMP0]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[CONST_0]]
+; CHECK-NEXT:    store i16 0, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:                                         ; preds = %loop.latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 true, label %loop.latch, label %else
+
+else:
+  br label %loop.latch
+
+loop.latch:
+  %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ]
+  %gep = getelementptr i16, ptr %dst, i64 %const.0
+  store i16 0, ptr %gep, align 2
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 100
+  br i1 %cmp, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_inbounds_ptradd(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[PREDPHI]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    store i16 0, ptr [[TMP0]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[CONST_0]]
+; CHECK-NEXT:    store i16 0, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:                                         ; preds = %loop.latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 true, label %loop.latch, label %else
+
+else:
+  br label %loop.latch
+
+loop.latch:
+  %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ]
+  %gep = getelementptr inbounds i16, ptr %dst, i64 %const.0
+  store i16 0, ptr %gep, align 2
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 100
+  br i1 %cmp, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+define void @const_fold_select(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_select(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[PREDPHI]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
+; CHECK-NEXT:    store i64 [[TMP3]], ptr [[DST]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[OR:%.*]] = or i64 [[D]], [[CONST_1]]
+; CHECK-NEXT:    store i64 [[OR]], ptr [[DST]], align 8
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:                                         ; preds = %loop.latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 true, label %loop.latch, label %else
+
+else:
+  br label %loop.latch
+
+loop.latch:
+  %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ]
+  %or = or i64 %d, %const.1
+  store i64 %or, ptr %dst, align 8
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 100
+  br i1 %cmp, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+define void @const_fold_add_sub_mul_ashr_lshr(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_add_sub_mul_ashr_lshr(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i64> splat (i64 2), [[PREDPHI]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i64> [[TMP0]], [[PREDPHI]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i64> [[TMP1]], [[PREDPHI]]
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i64> [[TMP2]], splat (i64 3)
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], [[PREDPHI]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr [[DST]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 2, [[CONST_1]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[ADD]], [[CONST_1]]
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i64 [[SUB]], [[CONST_1]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[ASHR]], 3
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i64 [[MUL]], [[CONST_1]]
+; CHECK-NEXT:    store i64 [[LSHR]], ptr [[DST]], align 8
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:                                         ; preds = %loop.latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 true, label %loop.latch, label %else
+
+else:
+  br label %loop.latch
+
+loop.latch:
+  %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ]
+  %add = add i64 2, %const.1
+  %sub = sub i64 %add, %const.1
+  %ashr = ashr i64 %sub, %const.1
+  %mul = mul i64 %ashr, 3
+  %lshr = lshr i64 %mul, %const.1
+  store i64 %lshr, ptr %dst, align 8
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 100
+  br i1 %cmp, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+define void @const_fold_and_or_xor(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_and_or_xor(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i64> splat (i64 2), [[PREDPHI]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i64> [[TMP0]], [[PREDPHI]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[TMP1]], [[PREDPHI]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
+; CHECK-NEXT:    store i64 [[TMP3]], ptr [[DST]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[OR:%.*]] = or i64 2, [[CONST_1]]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[OR]], [[CONST_1]]
+; CHECK-NEXT:    [[XOR:%.*]] = and i64 [[AND]], [[CONST_1]]
+; CHECK-NEXT:    store i64 [[XOR]], ptr [[DST]], align 8
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:                                         ; preds = %loop.latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 true, label %loop.latch, label %else
+
+else:
+  br label %loop.latch
+
+loop.latch:
+  %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ]
+  %or = or i64 2, %const.1
+  %and = and i64 %or, %const.1
+  %xor = and i64 %and, %const.1
+  store i64 %xor, ptr %dst, align 8
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 100
+  br i1 %cmp, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+define void @const_fold_cmp_zext(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_cmp_zext(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <4 x i64> splat (i64 2), [[PREDPHI]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i8>
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
+; CHECK-NEXT:    store i8 [[TMP2]], ptr [[DST]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = icmp ugt i64 2, [[CONST_1]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[VAL]] to i8
+; CHECK-NEXT:    store i8 [[ZEXT]], ptr [[DST]], align 1
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:                                         ; preds = %loop.latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 true, label %loop.latch, label %else
+
+else:
+  br label %loop.latch
+
+loop.latch:
+  %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ]
+  %val = icmp ugt i64 2, %const.1
+  %zext = zext i1 %val to i8
+  store i8 %zext, ptr %dst, align 1
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 100
+  br i1 %cmp, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+define void @const_fold_trunc(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_trunc(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> zeroinitializer, <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i64> [[PREDPHI]] to <4 x i16>
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
+; CHECK-NEXT:    store i16 [[TMP1]], ptr [[DST]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[CONST_0]] to i16
+; CHECK-NEXT:    store i16 [[TRUNC]], ptr [[DST]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:                                         ; preds = %loop.latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 true, label %loop.latch, label %else
+
+else:
+  br label %loop.latch
+
+loop.latch:
+  %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ]
+  %trunc = trunc i64 %const.0 to i16
+  store i16 %trunc, ptr %dst, align 2
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 100
+  br i1 %cmp, label %loop.header, label %exit
+
+exit:
+  ret void
+}

>From 76defe2e2f1994d6f1b5e3e1739c3624ca2c8fb9 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sat, 1 Feb 2025 20:32:51 +0000
Subject: [PATCH 2/7] VPlan: implement VPlan-level constant-folding

Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan,
and use it in VPBuilder to constant-fold when all the underlying IR
values passed into the API are constants.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |   5 +-
 llvm/lib/Transforms/Vectorize/VPlan.cpp       |   2 +-
 .../Vectorize/VPlanConstantFolder.h           | 157 +++++++++++++++++
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  30 +++-
 .../Transforms/Vectorize/VPlanTransforms.h    |   5 +-
 .../RISCV/blocks-with-dead-instructions.ll    |  48 +-----
 .../truncate-to-minimal-bitwidth-cost.ll      |   6 +-
 ...licate-recipe-with-only-first-lane-used.ll | 162 +++++++++++++++---
 .../LoopVectorize/constantfolder.ll           |  44 +----
 .../debugloc-optimize-vfuf-term.ll            |  18 +-
 .../LoopVectorize/first-order-recurrence.ll   |   7 +-
 .../interleave-and-scalarize-only.ll          |   3 +-
 12 files changed, 350 insertions(+), 137 deletions(-)
 create mode 100644 llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2dca2b322ee82..fb5c75be62898 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7789,7 +7789,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
                            OrigLoop->getHeader()->getContext());
   VPlanTransforms::materializeBroadcasts(BestVPlan);
   VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
-  VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
+  VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType(),
+                                   OrigLoop->getHeader()->getDataLayout());
   VPlanTransforms::narrowInterleaveGroups(
       BestVPlan, BestVF,
       TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -9080,7 +9081,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
       if (!HasScalarVF)
         VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
                                  *Plan, CM.getMinimalBitwidths());
-      VPlanTransforms::optimize(*Plan);
+      VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
       // TODO: try to put it close to addActiveLaneMask().
       // Discard the plan if it is not EVL-compatible
       if (CM.foldTailWithEVL() && !HasScalarVF &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 90331a2ce2ce3..c676aeed1fc4b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1550,7 +1550,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
     if (auto Plan = tryToBuildVPlan(SubRange)) {
-      VPlanTransforms::optimize(*Plan);
+      VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
       // Update the name of the latch of the top-level vector loop region region
       // after optimizations which includes block folding.
       Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
new file mode 100644
index 0000000000000..6dc8616cf4d85
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
@@ -0,0 +1,157 @@
+//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANCONSTANTFOLDER_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANCONSTANTFOLDER_H
+
+#include "VPlan.h"
+#include "VPlanValue.h"
+#include "llvm/Analysis/TargetFolder.h"
+
+namespace llvm {
+class VPConstantFolder {
+  TargetFolder Folder;
+  VPTypeAnalysis TypeInfo;
+
+  Constant *getIRConstant(VPValue *V) const {
+    if (!V->isLiveIn())
+      return nullptr;
+    return dyn_cast_if_present<Constant>(V->getLiveInIRValue());
+  }
+
+  Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
+                   VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC)
+      return Folder.FoldBinOp(Opcode, LC, RC);
+    return nullptr;
+  }
+
+  Value *foldNot(VPValue *Op) const {
+    auto *C = getIRConstant(Op);
+    if (C)
+      return Folder.FoldBinOp(Instruction::BinaryOps::Xor, C,
+                              Constant::getAllOnesValue(C->getType()));
+    return nullptr;
+  }
+
+  Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC)
+      return Folder.FoldSelect(LC, RC,
+                               ConstantInt::getNullValue(RC->getType()));
+    return nullptr;
+  }
+
+  Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
+    auto *CC = getIRConstant(Cond);
+    auto *TV = getIRConstant(TrueVal);
+    auto *FV = getIRConstant(FalseVal);
+    if (CC && TV && FV)
+      return Folder.FoldSelect(CC, TV, FV);
+    return nullptr;
+  }
+
+  Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC)
+      return Folder.FoldCmp(Pred, LC, RC);
+    return nullptr;
+  }
+
+  Value *foldGEP(Type *Ty, VPValue *Base, ArrayRef<VPValue *> Offsets,
+                 GEPNoWrapFlags NW) const {
+    auto *BC = getIRConstant(Base);
+    if (!BC)
+      return nullptr;
+    SmallVector<Value *> IdxList;
+    for (auto *O : Offsets) {
+      if (auto *OffsetV = getIRConstant(O))
+        IdxList.emplace_back(OffsetV);
+      else
+        return nullptr;
+    }
+    return Folder.FoldGEP(Ty, BC, IdxList, NW);
+  }
+
+  Value *foldInsertElement(VPValue *Vec, VPValue *NewElt, VPValue *Idx) const {
+    auto *VC = getIRConstant(Vec);
+    auto *EC = getIRConstant(NewElt);
+    auto *IC = getIRConstant(Idx);
+    if (VC && EC && IC)
+      Folder.FoldInsertElement(VC, EC, IC);
+    return nullptr;
+  }
+
+  Value *foldExtractElement(VPValue *Vec, VPValue *Idx) const {
+    auto *VC = getIRConstant(Vec);
+    auto *IC = getIRConstant(Idx);
+    if (VC && IC)
+      Folder.FoldExtractElement(VC, IC);
+    return nullptr;
+  }
+
+  Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
+                  Type *DestTy) const {
+    auto *C = getIRConstant(Op);
+    if (C)
+      return Folder.FoldCast(Opcode, C, DestTy);
+    return nullptr;
+  }
+
+public:
+  VPConstantFolder(const DataLayout &DL, const VPTypeAnalysis &TypeInfo)
+      : Folder(DL), TypeInfo(TypeInfo) {}
+
+  Value *tryToConstantFold(VPRecipeBase &R, unsigned Opcode,
+                           ArrayRef<VPValue *> Ops) {
+    switch (Opcode) {
+    case Instruction::BinaryOps::Add:
+    case Instruction::BinaryOps::Sub:
+    case Instruction::BinaryOps::Mul:
+    case Instruction::BinaryOps::AShr:
+    case Instruction::BinaryOps::LShr:
+    case Instruction::BinaryOps::And:
+    case Instruction::BinaryOps::Or:
+    case Instruction::BinaryOps::Xor:
+      return foldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
+                       Ops[1]);
+    case VPInstruction::LogicalAnd:
+      return foldLogicalAnd(Ops[0], Ops[1]);
+    case VPInstruction::Not:
+      return foldNot(Ops[0]);
+    case Instruction::Select:
+      return foldSelect(Ops[0], Ops[1], Ops[2]);
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      return foldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
+                     Ops[1]);
+    case Instruction::GetElementPtr:
+    case VPInstruction::PtrAdd:
+      return foldGEP(TypeInfo.inferScalarType(R.getVPSingleValue()), Ops[0],
+                     Ops.drop_front(),
+                     cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
+    case Instruction::InsertElement:
+      return foldInsertElement(Ops[0], Ops[1], Ops[2]);
+    case Instruction::ExtractElement:
+      return foldExtractElement(Ops[0], Ops[1]);
+    case Instruction::CastOps::SExt:
+    case Instruction::CastOps::ZExt:
+    case Instruction::CastOps::Trunc:
+      return foldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
+                      TypeInfo.inferScalarType(R.getVPSingleValue()));
+    }
+    return nullptr;
+  }
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7093d378d8c3e..d866eb6d73a26 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -16,6 +16,7 @@
 #include "VPlan.h"
 #include "VPlanAnalysis.h"
 #include "VPlanCFG.h"
+#include "VPlanConstantFolder.h"
 #include "VPlanDominatorTree.h"
 #include "VPlanHelpers.h"
 #include "VPlanPatternMatch.h"
@@ -938,9 +939,21 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
 }
 
 /// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
+                           const DataLayout &DL) {
   using namespace llvm::VPlanPatternMatch;
 
+  // Constant folding.
+  VPConstantFolder Folder(DL, TypeInfo);
+  TypeSwitch<VPRecipeBase *, void>(&R)
+      .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe>(
+          [&](auto *I) {
+            VPlan *Plan = R.getParent()->getPlan();
+            ArrayRef<VPValue *> Ops(I->op_begin(), I->op_end());
+            if (Value *V = Folder.tryToConstantFold(R, I->getOpcode(), Ops))
+              R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
+          });
+
   // VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
   // part 0 can be replaced by their start value, if only the first lane is
   // demanded.
@@ -1073,13 +1086,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
   }
 }
 
-void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
+void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
+                                      const DataLayout &DL) {
   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
       Plan.getEntry());
   VPTypeAnalysis TypeInfo(&CanonicalIVTy);
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
-      simplifyRecipe(R, TypeInfo);
+      simplifyRecipe(R, TypeInfo, DL);
     }
   }
 }
@@ -1330,7 +1344,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
 
     VPBlockUtils::connectBlocks(Preheader, Header);
     VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
-    VPlanTransforms::simplifyRecipes(Plan, *CanIVTy);
+    VPlanTransforms::simplifyRecipes(Plan, *CanIVTy,
+                                     PSE.getSE()->getDataLayout());
   } else {
     // The vector region contains header phis for which we cannot remove the
     // loop region yet.
@@ -1799,17 +1814,16 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
     VPBB->back().eraseFromParent();
   }
 }
-
-void VPlanTransforms::optimize(VPlan &Plan) {
+void VPlanTransforms::optimize(VPlan &Plan, const DataLayout &DL) {
   runPass(removeRedundantCanonicalIVs, Plan);
   runPass(removeRedundantInductionCasts, Plan);
 
-  runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
+  runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
   runPass(simplifyBlends, Plan);
   runPass(removeDeadRecipes, Plan);
   runPass(legalizeAndOptimizeInductions, Plan);
   runPass(removeRedundantExpandSCEVRecipes, Plan);
-  runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
+  runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
   runPass(removeBranchOnCondTrue, Plan);
   runPass(removeDeadRecipes, Plan);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 9e8b518a0c7eb..6bd69ac804f31 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -109,7 +109,7 @@ struct VPlanTransforms {
   /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
   /// optimizations, dead recipe removal, replicate region optimizations and
   /// block merging.
-  static void optimize(VPlan &Plan);
+  static void optimize(VPlan &Plan, const DataLayout &DL);
 
   /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
   /// region block and remove the mask operand. Optimize the created regions by
@@ -190,7 +190,8 @@ struct VPlanTransforms {
 
   /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
   /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
-  static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
+  static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
+                              const DataLayout &DL);
 
   /// If there's a single exit block, optimize its phi recipes that use exiting
   /// IV values by feeding them precomputed end values instead, possibly taken
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
index a1a5deb93f6c7..ea8df1669288d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
@@ -47,14 +47,10 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP23:%.*]] = mul i32 [[TMP22]], 8
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], 1
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -141,14 +137,10 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP20:%.*]] = mul i32 [[TMP19]], 4
-; CHECK-NEXT:    [[TMP21:%.*]] = sub i32 [[TMP20]], 1
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -235,14 +227,10 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP20:%.*]] = mul i32 [[TMP19]], 4
-; CHECK-NEXT:    [[TMP21:%.*]] = sub i32 [[TMP20]], 1
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -339,14 +327,10 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP23:%.*]] = mul i32 [[TMP22]], 8
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], 1
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -445,14 +429,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP20:%.*]] = mul i32 [[TMP19]], 4
-; CHECK-NEXT:    [[TMP21:%.*]] = sub i32 [[TMP20]], 1
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -559,14 +539,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP23:%.*]] = mul i32 [[TMP22]], 8
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], 1
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -663,15 +639,11 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 {
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], 8
-; CHECK-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], 1
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> zeroinitializer, i32 [[TMP16]]
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -751,15 +723,11 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 {
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP16:%.*]] = mul i32 [[TMP15]], 8
-; CHECK-NEXT:    [[TMP17:%.*]] = sub i32 [[TMP16]], 1
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> zeroinitializer, i32 [[TMP17]]
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
index 61706fe4d55c0..f83b5782d5ad3 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
@@ -165,10 +165,6 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP10]], 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[X]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = trunc <vscale x 4 x i8> [[BROADCAST_SPLAT]] to <vscale x 4 x i1>
-; CHECK-NEXT:    [[TMP8:%.*]] = or <vscale x 4 x i1> splat (i1 true), [[TMP7]]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
@@ -181,7 +177,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <vscale x 4 x i32> [[BROADCAST_SPLAT4]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <vscale x 4 x i32> [[VEC_IV]], i32 0
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[TMP15]], i32 9)
-; CHECK-NEXT:    [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> zeroinitializer
 ; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> [[BROADCAST_SPLAT2]], i32 1, <vscale x 4 x i1> [[TMP11]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
index e6c9ce3381f73..85bcd97d50c7d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
@@ -12,60 +12,166 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X]], 10
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 0
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    store i16 0, ptr [[TMP11]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[DIV_I:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    br label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ [[DIV_I]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[RETVAL_0_I]]
+; CHECK-NEXT:    store i16 0, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c = icmp eq i32 %x, 10
+  br label %loop.header
+
+loop.header:                                         ; preds = %loop.latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 true, label %loop.latch, label %else
+
+else:
+  %div.i = udiv i64 99, %d
+  br label %loop.latch
+
+loop.latch:
+  %retval.0.i = phi i64 [ %div.i, %else ], [ 0, %loop.header ]
+  %gep = getelementptr i16, ptr %dst, i64 %retval.0.i
+  store i16 0, ptr %gep, align 2
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, 101
+  br i1 %cmp, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+; Variant where ptradd cannot be const-folded.
+define void @replicate_udiv_with_only_first_lane_used2(i32 %x, ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @replicate_udiv_with_only_first_lane_used2(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X]], 10
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE14:.*]] ]
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
 ; CHECK:       [[PRED_UDIV_IF]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE]]
 ; CHECK:       [[PRED_UDIV_CONTINUE]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP0]], %[[PRED_UDIV_IF]] ]
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_UDIV_IF]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
 ; CHECK:       [[PRED_UDIV_IF1]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP6:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP6]], i32 1
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE2]]
 ; CHECK:       [[PRED_UDIV_CONTINUE2]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i64> [ [[TMP4]], %[[PRED_UDIV_CONTINUE]] ], [ [[TMP7]], %[[PRED_UDIV_IF1]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
 ; CHECK:       [[PRED_UDIV_IF3]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP10:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i64> [[TMP8]], i64 [[TMP10]], i32 2
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE4]]
 ; CHECK:       [[PRED_UDIV_CONTINUE4]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
+; CHECK-NEXT:    [[TMP49:%.*]] = phi <4 x i64> [ [[TMP8]], %[[PRED_UDIV_CONTINUE2]] ], [ [[TMP34]], %[[PRED_UDIV_IF3]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
 ; CHECK:       [[PRED_UDIV_IF5]]:
-; CHECK-NEXT:    [[TMP4:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP14:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x i64> [[TMP49]], i64 [[TMP14]], i32 3
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE6]]
 ; CHECK:       [[PRED_UDIV_CONTINUE6]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi <4 x i64> [ [[TMP49]], %[[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], %[[PRED_UDIV_IF5]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
 ; CHECK:       [[PRED_UDIV_IF7]]:
-; CHECK-NEXT:    [[TMP5:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP18:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP18]], i32 0
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE8]]
 ; CHECK:       [[PRED_UDIV_CONTINUE8]]:
-; CHECK-NEXT:    [[TMP6:%.*]] = phi i64 [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP5]], %[[PRED_UDIV_IF7]] ]
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
+; CHECK-NEXT:    [[TMP20:%.*]] = phi <4 x i64> [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP19]], %[[PRED_UDIV_IF7]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP21]], label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
 ; CHECK:       [[PRED_UDIV_IF9]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP22:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP22]], i32 1
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE10]]
 ; CHECK:       [[PRED_UDIV_CONTINUE10]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
+; CHECK-NEXT:    [[TMP24:%.*]] = phi <4 x i64> [ [[TMP20]], %[[PRED_UDIV_CONTINUE8]] ], [ [[TMP23]], %[[PRED_UDIV_IF9]] ]
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
 ; CHECK:       [[PRED_UDIV_IF11]]:
-; CHECK-NEXT:    [[TMP8:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP26:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i64> [[TMP24]], i64 [[TMP26]], i32 2
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE12]]
 ; CHECK:       [[PRED_UDIV_CONTINUE12]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
+; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i64> [ [[TMP24]], %[[PRED_UDIV_CONTINUE10]] ], [ [[TMP27]], %[[PRED_UDIV_IF11]] ]
+; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP29]], label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
 ; CHECK:       [[PRED_UDIV_IF13]]:
-; CHECK-NEXT:    [[TMP9:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP30:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i64> [[TMP28]], i64 [[TMP30]], i32 3
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE14]]
 ; CHECK:       [[PRED_UDIV_CONTINUE14]]:
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[TMP1]]
-; CHECK-NEXT:    [[PREDPHI15:%.*]] = select i1 true, i64 0, i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI15]]
-; CHECK-NEXT:    store i16 0, ptr [[TMP10]], align 2
+; CHECK-NEXT:    [[TMP32:%.*]] = phi <4 x i64> [ [[TMP28]], %[[PRED_UDIV_CONTINUE12]] ], [ [[TMP31]], %[[PRED_UDIV_IF13]] ]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP16]]
+; CHECK-NEXT:    [[PREDPHI15:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP32]]
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP33]]
+; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 1
+; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP35]]
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 2
+; CHECK-NEXT:    [[TMP38:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP37]]
+; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 3
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP39]]
+; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 0
+; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP41]]
+; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 1
+; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP43]]
+; CHECK-NEXT:    [[TMP45:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 2
+; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP45]]
+; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 3
+; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP47]]
 ; CHECK-NEXT:    store i16 0, ptr [[TMP11]], align 2
+; CHECK-NEXT:    store i16 0, ptr [[TMP36]], align 2
+; CHECK-NEXT:    store i16 0, ptr [[TMP38]], align 2
+; CHECK-NEXT:    store i16 0, ptr [[TMP40]], align 2
+; CHECK-NEXT:    store i16 0, ptr [[TMP42]], align 2
+; CHECK-NEXT:    store i16 0, ptr [[TMP44]], align 2
+; CHECK-NEXT:    store i16 0, ptr [[TMP46]], align 2
+; CHECK-NEXT:    store i16 0, ptr [[TMP48]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
-; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
@@ -73,7 +179,7 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT:    br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK-NEXT:    br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
 ; CHECK:       [[ELSE]]:
 ; CHECK-NEXT:    [[DIV_I:%.*]] = udiv i64 99, [[D]]
 ; CHECK-NEXT:    br label %[[LOOP_LATCH]]
@@ -83,7 +189,7 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
 ; CHECK-NEXT:    store i16 0, ptr [[GEP]], align 2
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
-; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -93,7 +199,7 @@ entry:
 
 loop.header:                                         ; preds = %loop.latch, %entry
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
-  br i1 true, label %loop.latch, label %else
+  br i1 %c, label %loop.latch, label %else
 
 else:
   %div.i = udiv i64 99, %d
@@ -115,4 +221,6 @@ exit:
 ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
 ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
 ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
index 737f2c22da1d8..d6edec27cbd54 100644
--- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
@@ -7,8 +7,7 @@ define void @const_fold_ptradd(ptr %dst, i64 %d) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 0
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -64,8 +63,7 @@ define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[PREDPHI]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 0
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -123,8 +121,7 @@ define void @const_fold_select(ptr %dst, i64 %d) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[PREDPHI]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], splat (i64 1)
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -181,19 +178,10 @@ define void @const_fold_add_sub_mul_ashr_lshr(ptr %dst, i64 %d) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i64> splat (i64 2), [[PREDPHI]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i64> [[TMP0]], [[PREDPHI]]
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i64> [[TMP1]], [[PREDPHI]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i64> [[TMP2]], splat (i64 3)
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], [[PREDPHI]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
-; CHECK-NEXT:    store i64 [[TMP5]], ptr [[DST]], align 8
+; CHECK-NEXT:    store i64 1, ptr [[DST]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -253,17 +241,10 @@ define void @const_fold_and_or_xor(ptr %dst, i64 %d) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i64> splat (i64 2), [[PREDPHI]]
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i64> [[TMP0]], [[PREDPHI]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[TMP1]], [[PREDPHI]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
-; CHECK-NEXT:    store i64 [[TMP3]], ptr [[DST]], align 8
+; CHECK-NEXT:    store i64 1, ptr [[DST]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -319,16 +300,10 @@ define void @const_fold_cmp_zext(ptr %dst, i64 %d) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <4 x i64> splat (i64 2), [[PREDPHI]]
-; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i8>
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
-; CHECK-NEXT:    store i8 [[TMP2]], ptr [[DST]], align 1
+; CHECK-NEXT:    store i8 1, ptr [[DST]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -382,15 +357,10 @@ define void @const_fold_trunc(ptr %dst, i64 %d) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> zeroinitializer, <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i64> [[PREDPHI]] to <4 x i16>
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
-; CHECK-NEXT:    store i16 [[TMP1]], ptr [[DST]], align 2
+; CHECK-NEXT:    store i16 0, ptr [[DST]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll
index 0f34f6243f155..913a13410f527 100644
--- a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll
+++ b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll
@@ -12,19 +12,19 @@ define i32 @foo(ptr %p) {
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    store i8 0, ptr [[P]], align 1, !dbg [[DBG3:![0-9]+]]
-; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]], !dbg [[DBG7:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG7:![0-9]+]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG9:![0-9]+]]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG8:![0-9]+]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], !dbg [[DBG9]]
-; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 0 to i8, !dbg [[DBG7]]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], !dbg [[DBG8]]
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 0 to i8, !dbg [[DBG9:![0-9]+]]
 ; CHECK-NEXT:    store i8 [[CONV]], ptr [[P]], align 1, !dbg [[DBG3]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1, !dbg [[DBG10:![0-9]+]]
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1, !dbg [[DBG11:![0-9]+]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG8]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG7]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret i32 0
 ;
@@ -65,9 +65,9 @@ exit:                              ; preds = %loop
 ; CHECK: [[META4]] = distinct !DISubprogram(name: "foo", scope: [[META1]], file: [[META1]], line: 11, type: [[META5:![0-9]+]], spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META6:![0-9]+]])
 ; CHECK: [[META5]] = distinct !DISubroutineType(types: [[META6]])
 ; CHECK: [[META6]] = !{}
-; CHECK: [[DBG7]] = !DILocation(line: 5, scope: [[META4]])
-; CHECK: [[DBG8]] = !DILocation(line: 9, scope: [[META4]])
-; CHECK: [[DBG9]] = !DILocation(line: 4, scope: [[META4]])
+; CHECK: [[DBG7]] = !DILocation(line: 9, scope: [[META4]])
+; CHECK: [[DBG8]] = !DILocation(line: 4, scope: [[META4]])
+; CHECK: [[DBG9]] = !DILocation(line: 5, scope: [[META4]])
 ; CHECK: [[DBG10]] = !DILocation(line: 7, scope: [[META4]])
 ; CHECK: [[DBG11]] = !DILocation(line: 8, scope: [[META4]])
 ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META13:![0-9]+]], [[META14:![0-9]+]]}
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index c622315a47178..594b8ff70feb8 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -1254,11 +1254,10 @@ define i64 @constant_folded_previous_value() {
 ; UNROLL-NO-VF-NEXT:  entry:
 ; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-VF:       vector.ph:
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 0, 1
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP0]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 1, [[VECTOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -1266,7 +1265,7 @@ define i64 @constant_folded_previous_value() {
 ; UNROLL-NO-VF-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; UNROLL-NO-VF:       scalar.body:
 ; UNROLL-NO-VF-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
@@ -1276,7 +1275,7 @@ define i64 @constant_folded_previous_value() {
 ; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000
 ; UNROLL-NO-VF-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; UNROLL-NO-VF:       for.end:
-; UNROLL-NO-VF-NEXT:    [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    ret i64 [[VAR2_LCSSA]]
 ;
 ; SINK-AFTER-LABEL: @constant_folded_previous_value(
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index bf40d269e805e..fe59b022e1dd7 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -202,7 +202,6 @@ exit:
 ; DBG-NEXT: Successor(s): vector.ph
 ; DBG-EMPTY:
 ; DBG-NEXT: vector.ph:
-; DBG-NEXT:   EMIT vp<[[CAST:%.+]]> = trunc ir<1> to i32
 ; DBG-NEXT: Successor(s): vector loop
 ; DBG-EMPTY:
 ; DBG-NEXT: <x1> vector loop: {
@@ -210,7 +209,7 @@ exit:
 ; DBG-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; DBG-NEXT:     FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
 ; DBG-NEXT:     EMIT vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
-; DBG-NEXT:     vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>, vp<[[VF]]
+; DBG-NEXT:     vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, ir<1>, vp<[[VF]]
 ; DBG-NEXT:     EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
 ; DBG-NEXT:     CLONE store vp<[[SPLICE]]>, ir<%dst>
 ; DBG-NEXT:     EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>

>From 00f13007b96f21feb337692b6c94b59b7586c727 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 25 Apr 2025 10:32:52 +0100
Subject: [PATCH 3/7] [VPlan] Early-return after const-folding

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d866eb6d73a26..063071a4fd4ce 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -945,14 +945,18 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
 
   // Constant folding.
   VPConstantFolder Folder(DL, TypeInfo);
-  TypeSwitch<VPRecipeBase *, void>(&R)
-      .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe>(
-          [&](auto *I) {
+  if (TypeSwitch<VPRecipeBase *, bool>(&R)
+          .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
+                VPReplicateRecipe>([&](auto *I) {
             VPlan *Plan = R.getParent()->getPlan();
             ArrayRef<VPValue *> Ops(I->op_begin(), I->op_end());
-            if (Value *V = Folder.tryToConstantFold(R, I->getOpcode(), Ops))
+            Value *V = Folder.tryToConstantFold(R, I->getOpcode(), Ops);
+            if (V)
               R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
-          });
+            return V;
+          })
+          .Default([](auto *) { return false; }))
+    return;
 
   // VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
   // part 0 can be replaced by their start value, if only the first lane is

>From 96a957fe8e708c0cb5201f45e28c8c6d011d84a5 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sat, 3 May 2025 14:09:23 +0100
Subject: [PATCH 4/7] [VPlan] Simplify and inline VPlanConstantFolder

---
 .../Vectorize/VPlanConstantFolder.h           | 157 ------------------
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  64 ++++++-
 2 files changed, 61 insertions(+), 160 deletions(-)
 delete mode 100644 llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h

diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
deleted file mode 100644
index 6dc8616cf4d85..0000000000000
--- a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
+++ /dev/null
@@ -1,157 +0,0 @@
-//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANCONSTANTFOLDER_H
-#define LLVM_TRANSFORMS_VECTORIZE_VPLANCONSTANTFOLDER_H
-
-#include "VPlan.h"
-#include "VPlanValue.h"
-#include "llvm/Analysis/TargetFolder.h"
-
-namespace llvm {
-class VPConstantFolder {
-  TargetFolder Folder;
-  VPTypeAnalysis TypeInfo;
-
-  Constant *getIRConstant(VPValue *V) const {
-    if (!V->isLiveIn())
-      return nullptr;
-    return dyn_cast_if_present<Constant>(V->getLiveInIRValue());
-  }
-
-  Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
-                   VPValue *RHS) const {
-    auto *LC = getIRConstant(LHS);
-    auto *RC = getIRConstant(RHS);
-    if (LC && RC)
-      return Folder.FoldBinOp(Opcode, LC, RC);
-    return nullptr;
-  }
-
-  Value *foldNot(VPValue *Op) const {
-    auto *C = getIRConstant(Op);
-    if (C)
-      return Folder.FoldBinOp(Instruction::BinaryOps::Xor, C,
-                              Constant::getAllOnesValue(C->getType()));
-    return nullptr;
-  }
-
-  Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
-    auto *LC = getIRConstant(LHS);
-    auto *RC = getIRConstant(RHS);
-    if (LC && RC)
-      return Folder.FoldSelect(LC, RC,
-                               ConstantInt::getNullValue(RC->getType()));
-    return nullptr;
-  }
-
-  Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
-    auto *CC = getIRConstant(Cond);
-    auto *TV = getIRConstant(TrueVal);
-    auto *FV = getIRConstant(FalseVal);
-    if (CC && TV && FV)
-      return Folder.FoldSelect(CC, TV, FV);
-    return nullptr;
-  }
-
-  Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
-    auto *LC = getIRConstant(LHS);
-    auto *RC = getIRConstant(RHS);
-    if (LC && RC)
-      return Folder.FoldCmp(Pred, LC, RC);
-    return nullptr;
-  }
-
-  Value *foldGEP(Type *Ty, VPValue *Base, ArrayRef<VPValue *> Offsets,
-                 GEPNoWrapFlags NW) const {
-    auto *BC = getIRConstant(Base);
-    if (!BC)
-      return nullptr;
-    SmallVector<Value *> IdxList;
-    for (auto *O : Offsets) {
-      if (auto *OffsetV = getIRConstant(O))
-        IdxList.emplace_back(OffsetV);
-      else
-        return nullptr;
-    }
-    return Folder.FoldGEP(Ty, BC, IdxList, NW);
-  }
-
-  Value *foldInsertElement(VPValue *Vec, VPValue *NewElt, VPValue *Idx) const {
-    auto *VC = getIRConstant(Vec);
-    auto *EC = getIRConstant(NewElt);
-    auto *IC = getIRConstant(Idx);
-    if (VC && EC && IC)
-      Folder.FoldInsertElement(VC, EC, IC);
-    return nullptr;
-  }
-
-  Value *foldExtractElement(VPValue *Vec, VPValue *Idx) const {
-    auto *VC = getIRConstant(Vec);
-    auto *IC = getIRConstant(Idx);
-    if (VC && IC)
-      Folder.FoldExtractElement(VC, IC);
-    return nullptr;
-  }
-
-  Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
-                  Type *DestTy) const {
-    auto *C = getIRConstant(Op);
-    if (C)
-      return Folder.FoldCast(Opcode, C, DestTy);
-    return nullptr;
-  }
-
-public:
-  VPConstantFolder(const DataLayout &DL, const VPTypeAnalysis &TypeInfo)
-      : Folder(DL), TypeInfo(TypeInfo) {}
-
-  Value *tryToConstantFold(VPRecipeBase &R, unsigned Opcode,
-                           ArrayRef<VPValue *> Ops) {
-    switch (Opcode) {
-    case Instruction::BinaryOps::Add:
-    case Instruction::BinaryOps::Sub:
-    case Instruction::BinaryOps::Mul:
-    case Instruction::BinaryOps::AShr:
-    case Instruction::BinaryOps::LShr:
-    case Instruction::BinaryOps::And:
-    case Instruction::BinaryOps::Or:
-    case Instruction::BinaryOps::Xor:
-      return foldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
-                       Ops[1]);
-    case VPInstruction::LogicalAnd:
-      return foldLogicalAnd(Ops[0], Ops[1]);
-    case VPInstruction::Not:
-      return foldNot(Ops[0]);
-    case Instruction::Select:
-      return foldSelect(Ops[0], Ops[1], Ops[2]);
-    case Instruction::ICmp:
-    case Instruction::FCmp:
-      return foldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
-                     Ops[1]);
-    case Instruction::GetElementPtr:
-    case VPInstruction::PtrAdd:
-      return foldGEP(TypeInfo.inferScalarType(R.getVPSingleValue()), Ops[0],
-                     Ops.drop_front(),
-                     cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
-    case Instruction::InsertElement:
-      return foldInsertElement(Ops[0], Ops[1], Ops[2]);
-    case Instruction::ExtractElement:
-      return foldExtractElement(Ops[0], Ops[1]);
-    case Instruction::CastOps::SExt:
-    case Instruction::CastOps::ZExt:
-    case Instruction::CastOps::Trunc:
-      return foldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
-                      TypeInfo.inferScalarType(R.getVPSingleValue()));
-    }
-    return nullptr;
-  }
-};
-} // namespace llvm
-
-#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 063071a4fd4ce..1604628938f83 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -16,7 +16,6 @@
 #include "VPlan.h"
 #include "VPlanAnalysis.h"
 #include "VPlanCFG.h"
-#include "VPlanConstantFolder.h"
 #include "VPlanDominatorTree.h"
 #include "VPlanHelpers.h"
 #include "VPlanPatternMatch.h"
@@ -29,6 +28,7 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
@@ -938,6 +938,64 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
   }
 }
 
+class VPConstantFolder {
+  TargetFolder Folder;
+  VPTypeAnalysis TypeInfo;
+
+public:
+  VPConstantFolder(const DataLayout &DL, const VPTypeAnalysis &TypeInfo)
+      : Folder(DL), TypeInfo(TypeInfo) {}
+
+  Value *tryToConstantFold(VPRecipeBase &R, unsigned Opcode,
+                           ArrayRef<VPValue *> Operands) {
+    SmallVector<Value *, 4> Ops;
+    for (VPValue *Op : Operands) {
+      if (!Op->isLiveIn() || !Op->getLiveInIRValue())
+        return nullptr;
+      Ops.emplace_back(Op->getLiveInIRValue());
+    }
+    switch (Opcode) {
+    case Instruction::BinaryOps::Add:
+    case Instruction::BinaryOps::Sub:
+    case Instruction::BinaryOps::Mul:
+    case Instruction::BinaryOps::AShr:
+    case Instruction::BinaryOps::LShr:
+    case Instruction::BinaryOps::And:
+    case Instruction::BinaryOps::Or:
+    case Instruction::BinaryOps::Xor:
+      return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
+                              Ops[0], Ops[1]);
+    case VPInstruction::LogicalAnd:
+      return Folder.FoldSelect(Ops[0], Ops[1],
+                               ConstantInt::getNullValue(Ops[1]->getType()));
+    case VPInstruction::Not:
+      return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
+                              Constant::getAllOnesValue(Ops[0]->getType()));
+    case Instruction::Select:
+      return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
+                            Ops[1]);
+    case Instruction::GetElementPtr:
+    case VPInstruction::PtrAdd:
+      return Folder.FoldGEP(TypeInfo.inferScalarType(R.getVPSingleValue()),
+                            Ops[0], drop_begin(Ops),
+                            cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
+    case Instruction::InsertElement:
+      return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
+    case Instruction::ExtractElement:
+      return Folder.FoldExtractElement(Ops[0], Ops[1]);
+    case Instruction::CastOps::SExt:
+    case Instruction::CastOps::ZExt:
+    case Instruction::CastOps::Trunc:
+      return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
+                             TypeInfo.inferScalarType(R.getVPSingleValue()));
+    }
+    return nullptr;
+  }
+};
+
 /// Try to simplify recipe \p R.
 static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
                            const DataLayout &DL) {
@@ -949,8 +1007,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
           .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
                 VPReplicateRecipe>([&](auto *I) {
             VPlan *Plan = R.getParent()->getPlan();
-            ArrayRef<VPValue *> Ops(I->op_begin(), I->op_end());
-            Value *V = Folder.tryToConstantFold(R, I->getOpcode(), Ops);
+            Value *V =
+                Folder.tryToConstantFold(R, I->getOpcode(), I->operands());
             if (V)
               R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
             return V;

>From 5e6ba18efb7becc4211022cb65eb8975234ba600 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 6 May 2025 10:10:01 +0100
Subject: [PATCH 5/7] [LV] Add test for const-folder crash

---
 .../constantfolder-infer-correct-gepty.ll     | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll

diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll
new file mode 100644
index 0000000000000..31ca00b6ecc5e
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll
@@ -0,0 +1,31 @@
+; REQUIRES: asserts
+; RUN: not --crash opt -passes=loop-vectorize -force-vector-width=8 -disable-output %s
+
+ at postscale = external constant [64 x float]
+
+define void @test(ptr %data) {
+entry:
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %or.iv.1 = or disjoint i64 %iv, 1
+  %gep.postscale = getelementptr [64 x float], ptr @postscale, i64 0, i64 %or.iv.1
+  %load.postscale = load float, ptr %gep.postscale, align 4, !tbaa !0
+  %lrint = tail call i64 @llvm.lrint.i64.f32(float %load.postscale)
+  %lrint.trunc = trunc i64 %lrint to i16
+  store i16 %lrint.trunc, ptr %data, align 2, !tbaa !4
+  %iv.next = add i64 %iv, 1
+  %exit.cond = icmp eq i64 %iv.next, 8
+  br i1 %exit.cond, label %end, label %loop
+
+end:                                              ; preds = %loop
+  ret void
+}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"float", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"short", !2, i64 0}

>From aedd1bb10a7bd56cd4cd1c2cf524c33c2690754d Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 6 May 2025 10:59:44 +0100
Subject: [PATCH 6/7] [VPlan] Address review, fix crash

---
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |  11 +-
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 110 ++++++++----------
 ...eave-to-widen-memory-remove-loop-region.ll |   3 +-
 .../X86/drop-poison-generating-flags.ll       |   7 +-
 .../constantfolder-infer-correct-gepty.ll     |  36 +++++-
 5 files changed, 96 insertions(+), 71 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6027572f1e844..682e16df948fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2728,8 +2728,15 @@ InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,
 
 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
   assert(State.Lane && "Predicated instruction PHI works per instance.");
-  Instruction *ScalarPredInst =
-      cast<Instruction>(State.get(getOperand(0), *State.Lane));
+  Value *ScalarPred = State.get(getOperand(0), *State.Lane);
+  Instruction *ScalarPredInst = dyn_cast<Instruction>(ScalarPred);
+  if (!ScalarPredInst) {
+    if (State.hasScalarValue(this, *State.Lane))
+      State.reset(this, ScalarPred, *State.Lane);
+    else
+      State.set(this, ScalarPred, *State.Lane);
+    return;
+  }
   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
   assert(PredicatingBB && "Predicated block has no single predecessor.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1604628938f83..e4155c7136bd3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -938,63 +938,56 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
   }
 }
 
-class VPConstantFolder {
-  TargetFolder Folder;
-  VPTypeAnalysis TypeInfo;
-
-public:
-  VPConstantFolder(const DataLayout &DL, const VPTypeAnalysis &TypeInfo)
-      : Folder(DL), TypeInfo(TypeInfo) {}
-
-  Value *tryToConstantFold(VPRecipeBase &R, unsigned Opcode,
-                           ArrayRef<VPValue *> Operands) {
-    SmallVector<Value *, 4> Ops;
-    for (VPValue *Op : Operands) {
-      if (!Op->isLiveIn() || !Op->getLiveInIRValue())
-        return nullptr;
-      Ops.emplace_back(Op->getLiveInIRValue());
-    }
-    switch (Opcode) {
-    case Instruction::BinaryOps::Add:
-    case Instruction::BinaryOps::Sub:
-    case Instruction::BinaryOps::Mul:
-    case Instruction::BinaryOps::AShr:
-    case Instruction::BinaryOps::LShr:
-    case Instruction::BinaryOps::And:
-    case Instruction::BinaryOps::Or:
-    case Instruction::BinaryOps::Xor:
-      return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
-                              Ops[0], Ops[1]);
-    case VPInstruction::LogicalAnd:
-      return Folder.FoldSelect(Ops[0], Ops[1],
-                               ConstantInt::getNullValue(Ops[1]->getType()));
-    case VPInstruction::Not:
-      return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
-                              Constant::getAllOnesValue(Ops[0]->getType()));
-    case Instruction::Select:
-      return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
-    case Instruction::ICmp:
-    case Instruction::FCmp:
-      return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
+/// Try to fold \p R using TargetFolder to a constant. Will succeed for a
+/// handled \p Opcode if all \p Operands are constant.
+static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode,
+                                ArrayRef<VPValue *> Operands,
+                                const DataLayout &DL,
+                                VPTypeAnalysis &TypeInfo) {
+  SmallVector<Value *, 4> Ops;
+  for (VPValue *Op : Operands) {
+    if (!Op->isLiveIn() || !Op->getLiveInIRValue())
+      return nullptr;
+    Ops.push_back(Op->getLiveInIRValue());
+  }
+
+  TargetFolder Folder(DL);
+  if (Instruction::isBinaryOp(Opcode))
+    return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
                             Ops[1]);
-    case Instruction::GetElementPtr:
-    case VPInstruction::PtrAdd:
-      return Folder.FoldGEP(TypeInfo.inferScalarType(R.getVPSingleValue()),
-                            Ops[0], drop_begin(Ops),
-                            cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
-    case Instruction::InsertElement:
-      return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
-    case Instruction::ExtractElement:
-      return Folder.FoldExtractElement(Ops[0], Ops[1]);
-    case Instruction::CastOps::SExt:
-    case Instruction::CastOps::ZExt:
-    case Instruction::CastOps::Trunc:
-      return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
-                             TypeInfo.inferScalarType(R.getVPSingleValue()));
-    }
-    return nullptr;
+  if (Instruction::isCast(Opcode))
+    return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
+                           TypeInfo.inferScalarType(R.getVPSingleValue()));
+  switch (Opcode) {
+  case VPInstruction::LogicalAnd:
+    return Folder.FoldSelect(Ops[0], Ops[1],
+                             ConstantInt::getNullValue(Ops[1]->getType()));
+  case VPInstruction::Not:
+    return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
+                            Constant::getAllOnesValue(Ops[0]->getType()));
+  case Instruction::Select:
+    return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
+                          Ops[1]);
+  case Instruction::GetElementPtr: {
+    auto &RFlags = cast<VPRecipeWithIRFlags>(R);
+    auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
+    return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops),
+                          RFlags.getGEPNoWrapFlags());
+  }
+  case VPInstruction::PtrAdd:
+    return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
+                          Ops[1],
+                          cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
+  case Instruction::InsertElement:
+    return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ExtractElement:
+    return Folder.FoldExtractElement(Ops[0], Ops[1]);
   }
-};
+  return nullptr;
+}
 
 /// Try to simplify recipe \p R.
 static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
@@ -1002,15 +995,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
   using namespace llvm::VPlanPatternMatch;
 
   // Constant folding.
-  VPConstantFolder Folder(DL, TypeInfo);
   if (TypeSwitch<VPRecipeBase *, bool>(&R)
           .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
                 VPReplicateRecipe>([&](auto *I) {
             VPlan *Plan = R.getParent()->getPlan();
-            Value *V =
-                Folder.tryToConstantFold(R, I->getOpcode(), I->operands());
+            Value *V = tryToConstantFold(*I, I->getOpcode(), I->operands(), DL,
+                                         TypeInfo);
             if (V)
-              R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
+              I->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
             return V;
           })
           .Default([](auto *) { return false; }))
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
index 25e1e2d4cab1e..220e450cf30d5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
@@ -13,8 +13,7 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) {
 ; VF2:       [[VECTOR_PH]]:
 ; VF2-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; VF2:       [[VECTOR_BODY]]:
-; VF2-NEXT:    [[TMP0:%.*]] = shl nsw i64 0, 1
-; VF2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
+; VF2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 0
 ; VF2-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
 ; VF2-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
 ; VF2-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index 8beb467572826..1e0f4ed9c73f5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -563,29 +563,24 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]]
 ; CHECK:       pred.srem.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = srem i64 3, 0
 ; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE]]
 ; CHECK:       pred.srem.continue:
-; CHECK-NEXT:    [[TMP5:%.*]] = phi i64 [ poison, %vector.body ], [ [[TMP4]], [[PRED_SREM_IF]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_SREM_IF1:%.*]], label [[PRED_SREM_CONTINUE2:%.*]]
 ; CHECK:       pred.srem.if1:
-; CHECK-NEXT:    [[TMP7:%.*]] = srem i64 3, 0
 ; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE2]]
 ; CHECK:       pred.srem.continue2:
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4:%.*]]
 ; CHECK:       pred.srem.if3:
-; CHECK-NEXT:    [[TMP10:%.*]] = srem i64 3, 0
 ; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE4]]
 ; CHECK:       pred.srem.continue4:
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_SREM_IF5:%.*]], label [[PRED_SREM_CONTINUE6]]
 ; CHECK:       pred.srem.if5:
-; CHECK-NEXT:    [[TMP13:%.*]] = srem i64 3, 0
 ; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE6]]
 ; CHECK:       pred.srem.continue6:
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP5]], -3
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 poison, -3
 ; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll
index 31ca00b6ecc5e..e6eaa0edbf3bd 100644
--- a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll
@@ -1,9 +1,41 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -passes=loop-vectorize -force-vector-width=8 -disable-output %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=8 -S %s | FileCheck %s
 
 @postscale = external constant [64 x float]
 
 define void @test(ptr %data) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[DATA:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, ptr getelementptr inbounds nuw (i8, ptr @postscale, i64 4), align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i64> [[TMP2]] to <8 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
+; CHECK-NEXT:    store i16 [[TMP4]], ptr [[DATA]], align 2, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[OR_IV_1:%.*]] = or disjoint i64 [[IV]], 1
+; CHECK-NEXT:    [[GEP_POSTSCALE:%.*]] = getelementptr [64 x float], ptr @postscale, i64 0, i64 [[OR_IV_1]]
+; CHECK-NEXT:    [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[LRINT:%.*]] = tail call i64 @llvm.lrint.i64.f32(float [[LOAD_POSTSCALE]])
+; CHECK-NEXT:    [[LRINT_TRUNC:%.*]] = trunc i64 [[LRINT]] to i16
+; CHECK-NEXT:    store i16 [[LRINT_TRUNC]], ptr [[DATA]], align 2, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 8
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 

>From 621306327a69808da5aec4e37290c0fbdcdbec60 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 8 May 2025 11:40:46 +0100
Subject: [PATCH 7/7] [VPlan] Retrieve DL from Plan

---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  5 ++---
 llvm/lib/Transforms/Vectorize/VPlan.cpp       |  2 +-
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 20 +++++++++----------
 .../Transforms/Vectorize/VPlanTransforms.h    |  5 ++---
 4 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fb5c75be62898..2dca2b322ee82 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7789,8 +7789,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
                            OrigLoop->getHeader()->getContext());
   VPlanTransforms::materializeBroadcasts(BestVPlan);
   VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
-  VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType(),
-                                   OrigLoop->getHeader()->getDataLayout());
+  VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
   VPlanTransforms::narrowInterleaveGroups(
       BestVPlan, BestVF,
       TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -9081,7 +9080,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
       if (!HasScalarVF)
         VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
                                  *Plan, CM.getMinimalBitwidths());
-      VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
+      VPlanTransforms::optimize(*Plan);
       // TODO: try to put it close to addActiveLaneMask().
       // Discard the plan if it is not EVL-compatible
       if (CM.foldTailWithEVL() && !HasScalarVF &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index c676aeed1fc4b..90331a2ce2ce3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1550,7 +1550,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
     if (auto Plan = tryToBuildVPlan(SubRange)) {
-      VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
+      VPlanTransforms::optimize(*Plan);
       // Update the name of the latch of the top-level vector loop region region
       // after optimizations which includes block folding.
       Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e4155c7136bd3..5525da1b0e07c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -990,8 +990,7 @@ static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode,
 }
 
 /// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
-                           const DataLayout &DL) {
+static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
   using namespace llvm::VPlanPatternMatch;
 
   // Constant folding.
@@ -999,6 +998,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
           .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
                 VPReplicateRecipe>([&](auto *I) {
             VPlan *Plan = R.getParent()->getPlan();
+            const DataLayout &DL =
+                Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
             Value *V = tryToConstantFold(*I, I->getOpcode(), I->operands(), DL,
                                          TypeInfo);
             if (V)
@@ -1140,14 +1141,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
   }
 }
 
-void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
-                                      const DataLayout &DL) {
+void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
       Plan.getEntry());
   VPTypeAnalysis TypeInfo(&CanonicalIVTy);
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
-      simplifyRecipe(R, TypeInfo, DL);
+      simplifyRecipe(R, TypeInfo);
     }
   }
 }
@@ -1398,8 +1398,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
 
     VPBlockUtils::connectBlocks(Preheader, Header);
     VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
-    VPlanTransforms::simplifyRecipes(Plan, *CanIVTy,
-                                     PSE.getSE()->getDataLayout());
+    VPlanTransforms::simplifyRecipes(Plan, *CanIVTy);
   } else {
     // The vector region contains header phis for which we cannot remove the
     // loop region yet.
@@ -1868,16 +1867,17 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
     VPBB->back().eraseFromParent();
   }
 }
-void VPlanTransforms::optimize(VPlan &Plan, const DataLayout &DL) {
+
+void VPlanTransforms::optimize(VPlan &Plan) {
   runPass(removeRedundantCanonicalIVs, Plan);
   runPass(removeRedundantInductionCasts, Plan);
 
-  runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
+  runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
   runPass(simplifyBlends, Plan);
   runPass(removeDeadRecipes, Plan);
   runPass(legalizeAndOptimizeInductions, Plan);
   runPass(removeRedundantExpandSCEVRecipes, Plan);
-  runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
+  runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
   runPass(removeBranchOnCondTrue, Plan);
   runPass(removeDeadRecipes, Plan);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 6bd69ac804f31..9e8b518a0c7eb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -109,7 +109,7 @@ struct VPlanTransforms {
   /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
   /// optimizations, dead recipe removal, replicate region optimizations and
   /// block merging.
-  static void optimize(VPlan &Plan, const DataLayout &DL);
+  static void optimize(VPlan &Plan);
 
   /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
   /// region block and remove the mask operand. Optimize the created regions by
@@ -190,8 +190,7 @@ struct VPlanTransforms {
 
   /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
   /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
-  static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
-                              const DataLayout &DL);
+  static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
 
   /// If there's a single exit block, optimize its phi recipes that use exiting
   /// IV values by feeding them precomputed end values instead, possibly taken



More information about the llvm-commits mailing list