[llvm] [VPlan] Introduce VPlanConstantFolder (PR #125365)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 2 09:17:54 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/125365
>From 4d7f9100ddd786802b2ef258bdd278614128168b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 27 Feb 2025 10:11:53 +0000
Subject: [PATCH 1/2] [LV] Pre-commit tests for const-folder
---
.../LoopVectorize/constantfolder.ll | 381 ++++++++++++++++++
1 file changed, 381 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/constantfolder.ll
diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
new file mode 100644
index 0000000000000..a786a51c5c17e
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
@@ -0,0 +1,381 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
+
+define void @const_fold_ptradd(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_ptradd(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: store i16 0, ptr [[TMP0]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[CONST_0]]
+; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 true, label %loop.latch, label %else
+
+else:
+ br label %loop.latch
+
+loop.latch:
+ %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ]
+ %gep = getelementptr i16, ptr %dst, i64 %const.0
+ store i16 0, ptr %gep, align 2
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ult i64 %iv.next, 101
+ br i1 %cmp, label %loop.header, label %exit
+
+exit:
+ ret void
+}
+
+define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_inbounds_ptradd(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[PREDPHI]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: store i16 0, ptr [[TMP0]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[CONST_0]]
+; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 true, label %loop.latch, label %else
+
+else:
+ br label %loop.latch
+
+loop.latch:
+ %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ]
+ %gep = getelementptr inbounds i16, ptr %dst, i64 %const.0
+ store i16 0, ptr %gep, align 2
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ult i64 %iv.next, 101
+ br i1 %cmp, label %loop.header, label %exit
+
+exit:
+ ret void
+}
+
+define void @const_fold_select(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_select(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[PREDPHI]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
+; CHECK-NEXT: store i64 [[TMP3]], ptr [[DST]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[OR:%.*]] = or i64 [[D]], [[CONST_1]]
+; CHECK-NEXT: store i64 [[OR]], ptr [[DST]], align 8
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 true, label %loop.latch, label %else
+
+else:
+ br label %loop.latch
+
+loop.latch:
+ %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ]
+ %or = or i64 %d, %const.1
+ store i64 %or, ptr %dst, align 8
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ult i64 %iv.next, 101
+ br i1 %cmp, label %loop.header, label %exit
+
+exit:
+ ret void
+}
+
+define void @const_fold_and_or_xor(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_and_or_xor(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> splat (i64 2), [[PREDPHI]]
+; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[TMP0]], [[PREDPHI]]
+; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[TMP1]], [[PREDPHI]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: store i64 [[TMP3]], ptr [[DST]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[OR:%.*]] = or i64 2, [[CONST_1]]
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[OR]], [[CONST_1]]
+; CHECK-NEXT: [[XOR:%.*]] = and i64 [[AND]], [[CONST_1]]
+; CHECK-NEXT: store i64 [[XOR]], ptr [[DST]], align 8
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 true, label %loop.latch, label %else
+
+else:
+ br label %loop.latch
+
+loop.latch:
+ %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ]
+ %or = or i64 2, %const.1
+ %and = and i64 %or, %const.1
+ %xor = and i64 %and, %const.1
+ store i64 %xor, ptr %dst, align 8
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ult i64 %iv.next, 101
+ br i1 %cmp, label %loop.header, label %exit
+
+exit:
+ ret void
+}
+
+define void @const_fold_cmp_zext(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_cmp_zext(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i64> splat (i64 2), [[PREDPHI]]
+; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i8>
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
+; CHECK-NEXT: store i8 [[TMP2]], ptr [[DST]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[VAL:%.*]] = icmp ugt i64 2, [[CONST_1]]
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[VAL]] to i8
+; CHECK-NEXT: store i8 [[ZEXT]], ptr [[DST]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 true, label %loop.latch, label %else
+
+else:
+ br label %loop.latch
+
+loop.latch:
+ %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ]
+ %val = icmp ugt i64 2, %const.1
+ %zext = zext i1 %val to i8
+ store i8 %zext, ptr %dst, align 1
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ult i64 %iv.next, 101
+ br i1 %cmp, label %loop.header, label %exit
+
+exit:
+ ret void
+}
+
+define void @const_fold_trunc(ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @const_fold_trunc(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> zeroinitializer, <4 x i64> [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i64> [[PREDPHI]] to <4 x i16>
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
+; CHECK-NEXT: store i16 [[TMP1]], ptr [[DST]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[CONST_0]] to i16
+; CHECK-NEXT: store i16 [[TRUNC]], ptr [[DST]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 true, label %loop.latch, label %else
+
+else:
+ br label %loop.latch
+
+loop.latch:
+ %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ]
+ %trunc = trunc i64 %const.0 to i16
+ store i16 %trunc, ptr %dst, align 2
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ult i64 %iv.next, 101
+ br i1 %cmp, label %loop.header, label %exit
+
+exit:
+ ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
+;.
>From 241c8a6b47ed10494355bb678c3ce23f58abb0d5 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sat, 1 Feb 2025 20:32:51 +0000
Subject: [PATCH 2/2] VPlan: implement VPlan-level constant-folding
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan,
and use it in VPBuilder to constant-fold when all the underlying IR
values passed into the API are constants.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 5 +-
llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +-
.../Vectorize/VPlanConstantFolder.h | 100 +++++++++++
.../Transforms/Vectorize/VPlanPatternMatch.h | 21 ++-
.../Transforms/Vectorize/VPlanTransforms.cpp | 43 ++++-
.../Transforms/Vectorize/VPlanTransforms.h | 5 +-
...licate-recipe-with-only-first-lane-used.ll | 162 +++++++++++++++---
.../LoopVectorize/constantfolder.ll | 33 +---
8 files changed, 302 insertions(+), 69 deletions(-)
create mode 100644 llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 55cc801e91452..fa5bb59b861cc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7700,7 +7700,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
OrigLoop->getHeader()->getContext());
VPlanTransforms::materializeBroadcasts(BestVPlan);
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
- VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
+ VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType(),
+ OrigLoop->getHeader()->getDataLayout());
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -8986,7 +8987,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
if (!HasScalarVF)
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
*Plan, CM.getMinimalBitwidths());
- VPlanTransforms::optimize(*Plan);
+ VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
// TODO: try to put it close to addActiveLaneMask().
// Discard the plan if it is not EVL-compatible
if (CM.foldTailWithEVL() && !HasScalarVF &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 1e2f70e5c103e..d1497cac17bd6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1531,7 +1531,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
if (auto Plan = tryToBuildVPlan(SubRange)) {
- VPlanTransforms::optimize(*Plan);
+ VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
// Update the name of the latch of the top-level vector loop region region
// after optimizations which includes block folding.
Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
new file mode 100644
index 0000000000000..8479eeb598589
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
@@ -0,0 +1,100 @@
+//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VPlanValue.h"
+#include "llvm/Analysis/TargetFolder.h"
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANCONSTANTFOLDER_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANCONSTANTFOLDER_H
+
+namespace llvm {
+class VPConstantFolder {
+private:
+ TargetFolder Folder;
+
+ Constant *getIRConstant(VPValue *V) const {
+ if (!V->isLiveIn())
+ return nullptr;
+ return dyn_cast_if_present<Constant>(V->getLiveInIRValue());
+ }
+
+ Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
+ VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return Folder.FoldBinOp(Opcode, LC, RC);
+ return nullptr;
+ }
+
+public:
+ VPConstantFolder(const DataLayout &DL) : Folder(DL) {}
+
+ Value *foldAnd(VPValue *LHS, VPValue *RHS) const {
+ return foldBinOp(Instruction::BinaryOps::And, LHS, RHS);
+ }
+
+ Value *foldOr(VPValue *LHS, VPValue *RHS) const {
+ return foldBinOp(Instruction::BinaryOps::Or, LHS, RHS);
+ }
+
+ Value *foldNot(VPValue *Op) const {
+ auto *C = getIRConstant(Op);
+ if (C)
+ return Folder.FoldBinOp(Instruction::BinaryOps::Xor, C,
+ Constant::getAllOnesValue(C->getType()));
+ return nullptr;
+ }
+
+ Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return Folder.FoldSelect(LC, RC,
+ ConstantInt::getNullValue(RC->getType()));
+ return nullptr;
+ }
+
+ Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
+ auto *CC = getIRConstant(Cond);
+ auto *TV = getIRConstant(TrueVal);
+ auto *FV = getIRConstant(FalseVal);
+ if (CC && TV && FV)
+ return Folder.FoldSelect(CC, TV, FV);
+ return nullptr;
+ }
+
+ Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return Folder.FoldCmp(Pred, LC, RC);
+ return nullptr;
+ }
+
+ Value *foldPtrAdd(VPValue *Base, VPValue *Offset, GEPNoWrapFlags NW) const {
+ auto *BC = getIRConstant(Base);
+ auto *OC = getIRConstant(Offset);
+ if (BC && OC) {
+ auto &Ctx = BC->getType()->getContext();
+ return Folder.FoldGEP(Type::getInt8Ty(Ctx), BC, OC, NW);
+ }
+ return nullptr;
+ }
+
+ Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *DestTy) const {
+ auto *C = getIRConstant(Op);
+ if (C)
+ return Folder.FoldCast(Opcode, C, DestTy);
+ return nullptr;
+ }
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 2cd23efcf3eab..ca5bceb63628a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -173,8 +173,8 @@ struct Recipe_match {
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
return false;
- assert(R->getNumOperands() == std::tuple_size<Ops_t>::value &&
- "recipe with matched opcode the expected number of operands");
+ if (R->getNumOperands() != std::tuple_size<Ops_t>::value)
+ return false;
auto IdxSeq = std::make_index_sequence<std::tuple_size<Ops_t>::value>();
if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) {
@@ -353,6 +353,23 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Mul, Op0_t, Op1_t, true>(Op0, Op1);
}
+/// Match a binary AND operation. Note that while conceptually the operands can
+/// be matched commutatively, \p Commutative defaults to false in line with the
+/// IR-based pattern matching infrastructure. Use m_c_BinaryAnd for a
+/// commutative version of the matcher.
+template <typename Op0_t, typename Op1_t, bool Commutative = false>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, Commutative>
+m_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_Binary<Instruction::And, Op0_t, Op1_t, Commutative>(Op0, Op1);
+}
+
+template <typename Op0_t, typename Op1_t>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And,
+ /*Commutative*/ true>
+m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_BinaryAnd<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
+}
+
/// Match a binary OR operation. Note that while conceptually the operands can
/// be matched commutatively, \p Commutative defaults to false in line with the
/// IR-based pattern matching infrastructure. Use m_c_BinaryOr for a commutative
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9a041c83438dc..a2f72621e48a7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -16,6 +16,7 @@
#include "VPlan.h"
#include "VPlanAnalysis.h"
#include "VPlanCFG.h"
+#include "VPlanConstantFolder.h"
#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "VPlanUtils.h"
@@ -923,7 +924,8 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
}
/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
+ const DataLayout &DL) {
using namespace llvm::VPlanPatternMatch;
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
@@ -1053,7 +1055,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
// recipes to be visited during simplification.
- VPValue *X, *Y;
+ VPValue *X, *Y, *Z;
if (match(&R,
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) {
@@ -1076,15 +1078,39 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
TypeInfo.inferScalarType(R.getOperand(1)) ==
TypeInfo.inferScalarType(R.getVPSingleValue()))
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
+
+ // Constant folding. TODO: cast and cmp.
+ VPConstantFolder Folder(DL);
+ VPlan *Plan = R.getParent()->getPlan();
+ if (match(&R, m_BinaryAnd(m_VPValue(X), m_VPValue(Y))))
+ if (Value *V = Folder.foldAnd(X, Y))
+ R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
+ if (match(&R, m_BinaryOr(m_VPValue(X), m_VPValue(Y))))
+ if (Value *V = Folder.foldOr(X, Y))
+ R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
+ if (match(&R, m_Not(m_VPValue(X))))
+ if (Value *V = Folder.foldNot(X))
+ R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
+ if (match(&R, m_LogicalAnd(m_VPValue(X), m_VPValue(Y))))
+ if (Value *V = Folder.foldLogicalAnd(X, Y))
+ R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
+ if (match(&R, m_Select(m_VPValue(X), m_VPValue(Y), m_VPValue(Z))))
+ if (Value *V = Folder.foldSelect(X, Y, Z))
+ R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
+ if (match(&R, m_GetElementPtr(m_VPValue(X), m_VPValue(Y))))
+ if (Value *V = Folder.foldPtrAdd(
+ X, Y, cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags()))
+ R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
}
-void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
+void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
+ const DataLayout &DL) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan.getEntry());
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- simplifyRecipe(R, TypeInfo);
+ simplifyRecipe(R, TypeInfo, DL);
}
}
}
@@ -1256,7 +1282,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
VPBlockUtils::connectBlocks(Preheader, Header);
VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
- VPlanTransforms::simplifyRecipes(Plan, *CanIVTy);
+ VPlanTransforms::simplifyRecipes(Plan, *CanIVTy,
+ PSE.getSE()->getDataLayout());
} else {
// The vector region contains header phis for which we cannot remove the
// loop region yet.
@@ -1679,15 +1706,15 @@ void VPlanTransforms::truncateToMinimalBitwidths(
"some entries in MinBWs haven't been processed");
}
-void VPlanTransforms::optimize(VPlan &Plan) {
+void VPlanTransforms::optimize(VPlan &Plan, const DataLayout &DL) {
runPass(removeRedundantCanonicalIVs, Plan);
runPass(removeRedundantInductionCasts, Plan);
- runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
+ runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
runPass(removeDeadRecipes, Plan);
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
- runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
+ runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
runPass(removeDeadRecipes, Plan);
runPass(createAndOptimizeReplicateRegions, Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index c23ff38265670..0a1b4a11a0bcf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -101,7 +101,7 @@ struct VPlanTransforms {
/// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
/// optimizations, dead recipe removal, replicate region optimizations and
/// block merging.
- static void optimize(VPlan &Plan);
+ static void optimize(VPlan &Plan, const DataLayout &DL);
/// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
/// region block and remove the mask operand. Optimize the created regions by
@@ -181,7 +181,8 @@ struct VPlanTransforms {
/// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
- static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
+ static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
+ const DataLayout &DL);
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
index e6c9ce3381f73..85bcd97d50c7d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
@@ -12,60 +12,166 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 0
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[DIV_I:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ [[DIV_I]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[RETVAL_0_I]]
+; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %c = icmp eq i32 %x, 10
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 true, label %loop.latch, label %else
+
+else:
+ %div.i = udiv i64 99, %d
+ br label %loop.latch
+
+loop.latch:
+ %retval.0.i = phi i64 [ %div.i, %else ], [ 0, %loop.header ]
+ %gep = getelementptr i16, ptr %dst, i64 %retval.0.i
+ store i16 0, ptr %gep, align 2
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ult i64 %iv.next, 101
+ br i1 %cmp, label %loop.header, label %exit
+
+exit:
+ ret void
+}
+
+; Variant where ptradd cannot be const-folded.
+define void @replicate_udiv_with_only_first_lane_used2(i32 %x, ptr %dst, i64 %d) {
+; CHECK-LABEL: define void @replicate_udiv_with_only_first_lane_used2(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE14:.*]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
; CHECK: [[PRED_UDIV_IF]]:
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE]]
; CHECK: [[PRED_UDIV_CONTINUE]]:
-; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP0]], %[[PRED_UDIV_IF]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_UDIV_IF]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
; CHECK: [[PRED_UDIV_IF1]]:
-; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP6]], i32 1
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE2]]
; CHECK: [[PRED_UDIV_CONTINUE2]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
+; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i64> [ [[TMP4]], %[[PRED_UDIV_CONTINUE]] ], [ [[TMP7]], %[[PRED_UDIV_IF1]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
; CHECK: [[PRED_UDIV_IF3]]:
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP10:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i64> [[TMP8]], i64 [[TMP10]], i32 2
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE4]]
; CHECK: [[PRED_UDIV_CONTINUE4]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
+; CHECK-NEXT: [[TMP49:%.*]] = phi <4 x i64> [ [[TMP8]], %[[PRED_UDIV_CONTINUE2]] ], [ [[TMP34]], %[[PRED_UDIV_IF3]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
; CHECK: [[PRED_UDIV_IF5]]:
-; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP14:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP49]], i64 [[TMP14]], i32 3
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE6]]
; CHECK: [[PRED_UDIV_CONTINUE6]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
+; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i64> [ [[TMP49]], %[[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], %[[PRED_UDIV_IF5]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
; CHECK: [[PRED_UDIV_IF7]]:
-; CHECK-NEXT: [[TMP5:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP18:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP18]], i32 0
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE8]]
; CHECK: [[PRED_UDIV_CONTINUE8]]:
-; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP5]], %[[PRED_UDIV_IF7]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
+; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i64> [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP19]], %[[PRED_UDIV_IF7]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
; CHECK: [[PRED_UDIV_IF9]]:
-; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP22:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP22]], i32 1
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE10]]
; CHECK: [[PRED_UDIV_CONTINUE10]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
+; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i64> [ [[TMP20]], %[[PRED_UDIV_CONTINUE8]] ], [ [[TMP23]], %[[PRED_UDIV_IF9]] ]
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
; CHECK: [[PRED_UDIV_IF11]]:
-; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP26:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i64> [[TMP24]], i64 [[TMP26]], i32 2
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE12]]
; CHECK: [[PRED_UDIV_CONTINUE12]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
+; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i64> [ [[TMP24]], %[[PRED_UDIV_CONTINUE10]] ], [ [[TMP27]], %[[PRED_UDIV_IF11]] ]
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP29]], label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
; CHECK: [[PRED_UDIV_IF13]]:
-; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP30:%.*]] = udiv i64 99, [[D]]
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i64> [[TMP28]], i64 [[TMP30]], i32 3
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE14]]
; CHECK: [[PRED_UDIV_CONTINUE14]]:
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[TMP1]]
-; CHECK-NEXT: [[PREDPHI15:%.*]] = select i1 true, i64 0, i64 [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI15]]
-; CHECK-NEXT: store i16 0, ptr [[TMP10]], align 2
+; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i64> [ [[TMP28]], %[[PRED_UDIV_CONTINUE12]] ], [ [[TMP31]], %[[PRED_UDIV_IF13]] ]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP16]]
+; CHECK-NEXT: [[PREDPHI15:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP32]]
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP33]]
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 1
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP35]]
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 2
+; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP37]]
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 3
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP39]]
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 0
+; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP41]]
+; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 1
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP43]]
+; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 2
+; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP45]]
+; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 3
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP47]]
; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2
+; CHECK-NEXT: store i16 0, ptr [[TMP36]], align 2
+; CHECK-NEXT: store i16 0, ptr [[TMP38]], align 2
+; CHECK-NEXT: store i16 0, ptr [[TMP40]], align 2
+; CHECK-NEXT: store i16 0, ptr [[TMP42]], align 2
+; CHECK-NEXT: store i16 0, ptr [[TMP44]], align 2
+; CHECK-NEXT: store i16 0, ptr [[TMP46]], align 2
+; CHECK-NEXT: store i16 0, ptr [[TMP48]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
-; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -73,7 +179,7 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
; CHECK: [[ELSE]]:
; CHECK-NEXT: [[DIV_I:%.*]] = udiv i64 99, [[D]]
; CHECK-NEXT: br label %[[LOOP_LATCH]]
@@ -83,7 +189,7 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101
-; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -93,7 +199,7 @@ entry:
loop.header: ; preds = %loop.latch, %entry
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
- br i1 true, label %loop.latch, label %else
+ br i1 %c, label %loop.latch, label %else
else:
%div.i = udiv i64 99, %d
@@ -115,4 +221,6 @@ exit:
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
index a786a51c5c17e..83497122b3e9b 100644
--- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
@@ -7,8 +7,7 @@ define void @const_fold_ptradd(ptr %dst, i64 %d) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 0
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -64,8 +63,7 @@ define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[PREDPHI]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 0
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -123,8 +121,7 @@ define void @const_fold_select(ptr %dst, i64 %d) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[PREDPHI]]
+; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], splat (i64 1)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -181,17 +178,10 @@ define void @const_fold_and_or_xor(ptr %dst, i64 %d) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> splat (i64 2), [[PREDPHI]]
-; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[TMP0]], [[PREDPHI]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[TMP1]], [[PREDPHI]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: store i64 [[TMP3]], ptr [[DST]], align 8
+; CHECK-NEXT: store i64 1, ptr [[DST]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -247,16 +237,10 @@ define void @const_fold_cmp_zext(ptr %dst, i64 %d) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i64> splat (i64 2), [[PREDPHI]]
-; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i8>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
-; CHECK-NEXT: store i8 [[TMP2]], ptr [[DST]], align 1
+; CHECK-NEXT: store i8 1, ptr [[DST]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -310,15 +294,10 @@ define void @const_fold_trunc(ptr %dst, i64 %d) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> zeroinitializer, <4 x i64> [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i64> [[PREDPHI]] to <4 x i16>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
-; CHECK-NEXT: store i16 [[TMP1]], ptr [[DST]], align 2
+; CHECK-NEXT: store i16 0, ptr [[DST]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
More information about the llvm-commits
mailing list