[llvm] f9c9a32 - [LV] Add tests with fmax reductions without fast-math flags.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 22 12:36:05 PDT 2025
Author: Florian Hahn
Date: 2025-06-22T20:35:38+01:00
New Revision: f9c9a32e9cbe3547631db4c55596bdfde76c44dc
URL: https://github.com/llvm/llvm-project/commit/f9c9a32e9cbe3547631db4c55596bdfde76c44dc
DIFF: https://github.com/llvm/llvm-project/commit/f9c9a32e9cbe3547631db4c55596bdfde76c44dc.diff
LOG: [LV] Add tests with fmax reductions without fast-math flags.
Adds extra tests with fmax reductions without fast-math flags for
upcoming patches.
Added:
llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
new file mode 100644
index 0000000000000..77b40dabae1e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -mtriple=arm64-apple-macosx -S %s | FileCheck %s
+
+define float @fmax_ugt_with_select(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmax_ugt_with_select(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
+; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %cmp = fcmp ugt float %l, %max
+ %max.next = select i1 %cmp, float %l, float %max
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmaxnum(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmaxnum(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[MAX]], float [[L]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %max.next = call float @llvm.maxnum.f32(float %max, float %l)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
new file mode 100644
index 0000000000000..fb68d4cbd9e4b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
+
+define float @fmax_ugt_with_select(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmax_ugt_with_select(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
+; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %cmp = fcmp ugt float %l, %max
+ %max.next = select i1 %cmp, float %l, float %max
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmaxnum(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmaxnum(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[MAX]], float [[L]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %max.next = call float @llvm.maxnum.f32(float %max, float %l)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
new file mode 100644
index 0000000000000..3a8ef7e0b08c0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
@@ -0,0 +1,361 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
+
+define float @fmax_ugt_with_select_1(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmax_ugt_with_select_1(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
+; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %cmp = fcmp ugt float %l, %max
+ %max.next = select i1 %cmp, float %l, float %max
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmax_ugt_with_select_2(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmax_ugt_with_select_2(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[MAX]], [[L]]
+; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[MAX]], float [[L]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %cmp = fcmp ugt float %max, %l
+ %max.next = select i1 %cmp, float %max, float %l
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmax_ogt_with_select_1(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmax_ogt_with_select_1(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[L]], [[MAX]]
+; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %cmp = fcmp ogt float %l, %max
+ %max.next = select i1 %cmp, float %l, float %max
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmax_ogt_with_select_2(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmax_ogt_with_select_2(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[MAX]], [[L]]
+; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[MAX]], float [[L]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %cmp = fcmp ogt float %max, %l
+ %max.next = select i1 %cmp, float %max, float %l
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmax_ugt_with_select_store_result(ptr %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define float @fmax_ugt_with_select_store_result(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
+; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
+; CHECK-NEXT: store float [[MAX_NEXT]], ptr [[DST]], align 8
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %cmp = fcmp ugt float %l, %max
+ %max.next = select i1 %cmp, float %l, float %max
+ store float %max.next, ptr %dst, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmaxnum_1(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmaxnum_1(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[L]], float [[MAX]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %max.next = call float @llvm.maxnum.f32(float %l, float %max)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmaxnum_2(ptr %src, i64 %n) {
+; CHECK-LABEL: define float @fmaxnum_2(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[MAX]], float [[L]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %max.next = call float @llvm.maxnum.f32(float %max, float %l)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
+
+define float @fmax_select_with_blend(ptr %A, ptr %B) {
+; CHECK-LABEL: define float @fmax_select_with_blend(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[C_1:%.*]] = icmp eq i32 [[L_A]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_THEN:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_THEN]]:
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[C_2:%.*]] = fcmp ogt float [[MAX]], [[L]]
+; CHECK-NEXT: [[MAX_SEL:%.*]] = select i1 [[C_2]], float [[MAX]], float [[L]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[MAX_NEXT]] = phi float [ [[MAX_SEL]], %[[LOOP_THEN]] ], [ [[MAX]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %max = phi float [ 0.000000e+00, %entry ], [ %max.next, %loop.latch ]
+ %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A
+ %c.1 = icmp eq i32 %l.A, 0
+ br i1 %c.1, label %loop.then, label %loop.latch
+
+loop.then:
+ %gep.B = getelementptr inbounds float, ptr %B, i64 %iv
+ %l = load float, ptr %gep.B
+ %c.2 = fcmp ogt float %max, %l
+ %max.sel = select i1 %c.2, float %max, float %l
+ br label %loop.latch
+
+loop.latch:
+ %max.next = phi float [ %max.sel, %loop.then ], [ %max, %loop ]
+ %iv.next = add i64 %iv, 1
+ %ec = icmp ne i64 %iv.next, 1000
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret float %max.next
+}
+
+define float @fmax_with_select_and_load_store(ptr %src, ptr noalias %dst, i64 %n) {
+; CHECK-LABEL: define float @fmax_with_select_and_load_store(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[L]], [[MAX]]
+; CHECK-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV_1]]
+; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_DST_1]], align 4
+; CHECK-NEXT: [[GEP_DST_0:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[L_2]], ptr [[GEP_DST_0]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = select i1 [[CMP]], float [[L]], float [[MAX]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %cmp = fcmp ugt float %l, %max
+ %iv.1 = add i64 %iv, 1
+ %gep.dst.1 = getelementptr inbounds i32, ptr %dst, i64 %iv.1
+ %l.2 = load i32, ptr %gep.dst.1
+ %gep.dst.0 = getelementptr inbounds i32, ptr %dst, i64 %iv
+ store i32 %l.2, ptr %gep.dst.0
+ %max.next = select i1 %cmp, float %l, float %max
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %max.next
+}
More information about the llvm-commits
mailing list