[llvm] [LV][NFC] Auto-generate the test cases related to FindLastIV idioms. (PR #117560)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 06:03:08 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Mel Chen (Mel-Chen)
<details>
<summary>Changes</summary>
Pre-commit for #<!-- -->67812
---
Patch is 60.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117560.diff
4 Files Affected:
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll (+77-8)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+195-18)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll (+259-28)
- (modified) llvm/test/Transforms/LoopVectorize/select-min-index.ll (+220-24)
``````````diff
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
index 20710db19ba903..d3582ae16d1c18 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
@@ -1,8 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
-; CHECK-LABEL: define i64 @select_icmp_nuw_nsw
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i64 @select_icmp_nuw_nsw(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
@@ -25,8 +43,25 @@ exit: ; preds = %for.body
}
define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
-; CHECK-LABEL: define i64 @select_icmp_nsw
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i64 @select_icmp_nsw(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
@@ -49,8 +84,25 @@ exit: ; preds = %for.body
}
define i64 @select_icmp_nuw(ptr %a, ptr %b, i64 %ii, i64 %n) {
-; CHECK-LABEL: define i64 @select_icmp_nuw
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i64 @select_icmp_nuw(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
@@ -73,8 +125,25 @@ exit: ; preds = %for.body
}
define i64 @select_icmp_noflag(ptr %a, ptr %b, i64 %ii, i64 %n) {
-; CHECK-LABEL: define i64 @select_icmp_noflag
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i64 @select_icmp_noflag(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
+; CHECK-NEXT: [[INC]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
index b94e9f99868efd..2eb63db2b02474 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK
@@ -11,8 +12,31 @@
; %cmp.sgt = icmp sgt i32 %n, 0
; and successfully vectorize the case without a runtime-check.
define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) {
-; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP0]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT_LOOPEXIT]]:
+; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[RDX_LCSSA]]
;
entry:
%cmp.sgt = icmp sgt i32 %n, 0
@@ -48,8 +72,24 @@ exit: ; preds = %for.body, %entry
; %exitcond.not = icmp eq i64 %inc, 20000
; and successfully vectorize the case without a runtime-check.
define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) {
-; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP0]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
;
entry:
br label %for.body
@@ -73,8 +113,24 @@ exit: ; preds = %for.body
; Without loop guard, the maximum constant trip count that can be vectorized is
; the signed maximum value of reduction type.
define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
-; CHECK-LABEL: define i32 @select_fcmp_max_valid_const_ub
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i32 @select_fcmp_max_valid_const_ub(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
;
entry:
br label %for.body
@@ -105,8 +161,30 @@ exit: ; preds = %for.body
; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
; sentinel value), and need a runtime-check to vectorize this case.
define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr %a, i64 %n) {
-; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT_LOOPEXIT]]:
+; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[RDX_LCSSA]]
;
entry:
%cmp.sgt = icmp sgt i64 %n, 0
@@ -137,8 +215,31 @@ exit: ; preds = %for.body, %entry
; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
; sentinel value), and need a runtime-check to vectorize this case.
define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
-; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT_LOOPEXIT]]:
+; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[RDX_LCSSA]]
;
entry:
%cmp.not = icmp eq i32 %n, 0
@@ -173,8 +274,24 @@ exit: ; preds = %for.body, %entry
; Hence, the i32 will most certainly wrap and hit the sentinel value, and we
; cannot vectorize this case.
define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) {
-; CHECK-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
+; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
;
entry:
br label %for.body
@@ -198,8 +315,27 @@ exit: ; preds = %for.body
; Forbidding vectorization of the FindLastIV pattern involving a truncated
; induction variable in the absence of any loop guard.
define i32 @not_vectorized_select_iv_icmp_no_guard(ptr %a, ptr %b, i32 %start, i32 %n) {
-; CHECK-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret i32 [[COND_LCSSA]]
;
entry:
%wide.trip.count = zext i32 %n to i64
@@ -228,8 +364,24 @@ exit: ; preds = %for.body
; vectorizer is unable to guarantee that the induction variable is monotonic
; increasing.
define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
-; CHECK-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub
-; CHECK-NOT: vector.body:
+; CHECK-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/117560
More information about the llvm-commits
mailing list