[llvm] LLVM/Test: Add vectorizing testcases for fminimumnum and fminimumnum (PR #133843)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 31 20:11:09 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: YunQiang Su (wzssyqa)

<details>
<summary>Changes</summary>

Vectorizing of fminimumnum and fminimumnum have not support yet. Let's add the testcase for it now, and we will update the testcase when we support it.

---

Patch is 176.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133843.diff


6 Files Affected:

- (added) llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll (+265) 
- (added) llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll (+265) 
- (added) llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll (+259) 
- (added) llvm/test/Transforms/SLPVectorizer/AArch64/fminimumnum.ll (+516) 
- (added) llvm/test/Transforms/SLPVectorizer/RISCV/fminimumnum.ll (+516) 
- (added) llvm/test/Transforms/SLPVectorizer/X86/fminimumnum.ll (+510) 


``````````diff
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll
new file mode 100644
index 0000000000000..16968b3d11420
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll
@@ -0,0 +1,265 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; FIXME: fmaximumnum/fminimumnum have no vectorizing support yet.
+; RUN: opt --passes=loop-vectorize --mtriple=aarch64 -mattr="+neon" -S < %s | FileCheck %s --check-prefix=ARM64
+
+ at input1_f32 = global [4096 x float] zeroinitializer, align 4
+ at input2_f32 = global [4096 x float] zeroinitializer, align 4
+ at output_f32 = global [4096 x float] zeroinitializer, align 4
+ at input1_f64 = global [4096 x double] zeroinitializer, align 8
+ at input2_f64 = global [4096 x double] zeroinitializer, align 8
+ at output_f64 = global [4096 x double] zeroinitializer, align 8
+ at input1_f16 = global [4096 x half] zeroinitializer, align 2
+ at input2_f16 = global [4096 x half] zeroinitializer, align 2
+ at output_f16 = global [4096 x half] zeroinitializer, align 2
+
+define void @f32min()  {
+; ARM64-LABEL: define void @f32min(
+; ARM64-SAME: ) #[[ATTR0:[0-9]+]] {
+; ARM64-NEXT:  [[ENTRY:.*]]:
+; ARM64-NEXT:    br label %[[FOR_BODY:.*]]
+; ARM64:       [[FOR_COND_CLEANUP:.*]]:
+; ARM64-NEXT:    ret void
+; ARM64:       [[FOR_BODY]]:
+; ARM64-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; ARM64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @input1_f32, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; ARM64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @input2_f32, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; ARM64-NEXT:    [[TMP14:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP12]], float [[TMP13]])
+; ARM64-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @output_f32, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    store float [[TMP14]], ptr [[ARRAYIDX4]], align 4
+; ARM64-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; ARM64-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
+; ARM64-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw [4096 x float], ptr @input1_f32, i64 0, i64 %indvars.iv
+  %input1 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr @input2_f32, i64 0, i64 %indvars.iv
+  %input2 = load float, ptr %arrayidx2, align 4
+  %output = tail call float @llvm.minimumnum.f32(float %input1, float %input2)
+  %arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr @output_f32, i64 0, i64 %indvars.iv
+  store float %output, ptr %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4096
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare float @llvm.minimumnum.f32(float, float)
+
+define void @f32max()  {
+; ARM64-LABEL: define void @f32max(
+; ARM64-SAME: ) #[[ATTR0]] {
+; ARM64-NEXT:  [[ENTRY:.*]]:
+; ARM64-NEXT:    br label %[[FOR_BODY:.*]]
+; ARM64:       [[FOR_COND_CLEANUP:.*]]:
+; ARM64-NEXT:    ret void
+; ARM64:       [[FOR_BODY]]:
+; ARM64-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; ARM64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @input1_f32, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; ARM64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @input2_f32, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; ARM64-NEXT:    [[TMP14:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP12]], float [[TMP13]])
+; ARM64-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @output_f32, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    store float [[TMP14]], ptr [[ARRAYIDX4]], align 4
+; ARM64-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; ARM64-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
+; ARM64-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw [4096 x float], ptr @input1_f32, i64 0, i64 %indvars.iv
+  %input1 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr @input2_f32, i64 0, i64 %indvars.iv
+  %input2 = load float, ptr %arrayidx2, align 4
+  %output = tail call float @llvm.maximumnum.f32(float %input1, float %input2)
+  %arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr @output_f32, i64 0, i64 %indvars.iv
+  store float %output, ptr %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4096
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare float @llvm.maximumnum.f32(float, float)
+
+define void @f64min()  {
+; ARM64-LABEL: define void @f64min(
+; ARM64-SAME: ) #[[ATTR0]] {
+; ARM64-NEXT:  [[ENTRY:.*]]:
+; ARM64-NEXT:    br label %[[FOR_BODY:.*]]
+; ARM64:       [[FOR_COND_CLEANUP:.*]]:
+; ARM64-NEXT:    ret void
+; ARM64:       [[FOR_BODY]]:
+; ARM64-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; ARM64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @input1_f64, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; ARM64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @input2_f64, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP13:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
+; ARM64-NEXT:    [[TMP14:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP12]], double [[TMP13]])
+; ARM64-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @output_f64, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    store double [[TMP14]], ptr [[ARRAYIDX4]], align 8
+; ARM64-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; ARM64-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
+; ARM64-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw [4096 x double], ptr @input1_f64, i64 0, i64 %indvars.iv
+  %input1 = load double, ptr %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds nuw [4096 x double], ptr @input2_f64, i64 0, i64 %indvars.iv
+  %input2 = load double, ptr %arrayidx2, align 8
+  %output = tail call double @llvm.minimumnum.f64(double %input1, double %input2)
+  %arrayidx4 = getelementptr inbounds nuw [4096 x double], ptr @output_f64, i64 0, i64 %indvars.iv
+  store double %output, ptr %arrayidx4, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4096
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare double @llvm.minimumnum.f64(double, double)
+
+define void @f64max()  {
+; ARM64-LABEL: define void @f64max(
+; ARM64-SAME: ) #[[ATTR0]] {
+; ARM64-NEXT:  [[ENTRY:.*]]:
+; ARM64-NEXT:    br label %[[FOR_BODY:.*]]
+; ARM64:       [[FOR_COND_CLEANUP:.*]]:
+; ARM64-NEXT:    ret void
+; ARM64:       [[FOR_BODY]]:
+; ARM64-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; ARM64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @input1_f64, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; ARM64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @input2_f64, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP13:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
+; ARM64-NEXT:    [[TMP14:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP12]], double [[TMP13]])
+; ARM64-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @output_f64, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    store double [[TMP14]], ptr [[ARRAYIDX4]], align 8
+; ARM64-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; ARM64-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
+; ARM64-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw [4096 x double], ptr @input1_f64, i64 0, i64 %indvars.iv
+  %input1 = load double, ptr %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds nuw [4096 x double], ptr @input2_f64, i64 0, i64 %indvars.iv
+  %input2 = load double, ptr %arrayidx2, align 8
+  %output = tail call double @llvm.maximumnum.f64(double %input1, double %input2)
+  %arrayidx4 = getelementptr inbounds nuw [4096 x double], ptr @output_f64, i64 0, i64 %indvars.iv
+  store double %output, ptr %arrayidx4, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4096
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare double @llvm.maximumnum.f64(double, double)
+
+define void @f16min()  {
+; ARM64-LABEL: define void @f16min(
+; ARM64-SAME: ) #[[ATTR0]] {
+; ARM64-NEXT:  [[ENTRY:.*]]:
+; ARM64-NEXT:    br label %[[FOR_BODY:.*]]
+; ARM64:       [[FOR_COND_CLEANUP:.*]]:
+; ARM64-NEXT:    ret void
+; ARM64:       [[FOR_BODY]]:
+; ARM64-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; ARM64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @input1_f16, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP8:%.*]] = load half, ptr [[ARRAYIDX]], align 2
+; ARM64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @input2_f16, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP9:%.*]] = load half, ptr [[ARRAYIDX2]], align 2
+; ARM64-NEXT:    [[TMP10:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP8]], half [[TMP9]])
+; ARM64-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @output_f16, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    store half [[TMP10]], ptr [[ARRAYIDX4]], align 2
+; ARM64-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; ARM64-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
+; ARM64-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw [4096 x half], ptr @input1_f16, i64 0, i64 %indvars.iv
+  %input1 = load half, ptr %arrayidx, align 2
+  %arrayidx2 = getelementptr inbounds nuw [4096 x half], ptr @input2_f16, i64 0, i64 %indvars.iv
+  %input2 = load half, ptr %arrayidx2, align 2
+  %output = tail call half @llvm.minimumnum.f16(half %input1, half %input2)
+  %arrayidx4 = getelementptr inbounds nuw [4096 x half], ptr @output_f16, i64 0, i64 %indvars.iv
+  store half %output, ptr %arrayidx4, align 2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4096
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare half @llvm.minimumnum.f16(half, half)
+
+define void @f16max()  {
+; ARM64-LABEL: define void @f16max(
+; ARM64-SAME: ) #[[ATTR0]] {
+; ARM64-NEXT:  [[ENTRY:.*]]:
+; ARM64-NEXT:    br label %[[FOR_BODY:.*]]
+; ARM64:       [[FOR_COND_CLEANUP:.*]]:
+; ARM64-NEXT:    ret void
+; ARM64:       [[FOR_BODY]]:
+; ARM64-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; ARM64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @input1_f16, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP8:%.*]] = load half, ptr [[ARRAYIDX]], align 2
+; ARM64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @input2_f16, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    [[TMP9:%.*]] = load half, ptr [[ARRAYIDX2]], align 2
+; ARM64-NEXT:    [[TMP10:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP8]], half [[TMP9]])
+; ARM64-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @output_f16, i64 0, i64 [[INDVARS_IV]]
+; ARM64-NEXT:    store half [[TMP10]], ptr [[ARRAYIDX4]], align 2
+; ARM64-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; ARM64-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
+; ARM64-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw [4096 x half], ptr @input1_f16, i64 0, i64 %indvars.iv
+  %input1 = load half, ptr %arrayidx, align 2
+  %arrayidx2 = getelementptr inbounds nuw [4096 x half], ptr @input2_f16, i64 0, i64 %indvars.iv
+  %input2 = load half, ptr %arrayidx2, align 2
+  %output = tail call half @llvm.maximumnum.f16(half %input1, half %input2)
+  %arrayidx4 = getelementptr inbounds nuw [4096 x half], ptr @output_f16, i64 0, i64 %indvars.iv
+  store half %output, ptr %arrayidx4, align 2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4096
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare half @llvm.maximumnum.f16(half, half)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll
new file mode 100644
index 0000000000000..1ff43856a8bc1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll
@@ -0,0 +1,265 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; FIXME: fmaximumnum/fminimumnum have no vectorizing support yet.
+; RUN: opt --passes=loop-vectorize --mtriple=riscv64 -mattr="+zvfh,+v,+zfh" -S < %s | FileCheck %s --check-prefix=RV64
+
+ at input1_f32 = global [4096 x float] zeroinitializer, align 4
+ at input2_f32 = global [4096 x float] zeroinitializer, align 4
+ at output_f32 = global [4096 x float] zeroinitializer, align 4
+ at input1_f64 = global [4096 x double] zeroinitializer, align 8
+ at input2_f64 = global [4096 x double] zeroinitializer, align 8
+ at output_f64 = global [4096 x double] zeroinitializer, align 8
+ at input1_f16 = global [4096 x half] zeroinitializer, align 2
+ at input2_f16 = global [4096 x half] zeroinitializer, align 2
+ at output_f16 = global [4096 x half] zeroinitializer, align 2
+
+define void @f32min()  {
+; RV64-LABEL: define void @f32min(
+; RV64-SAME: ) #[[ATTR0:[0-9]+]] {
+; RV64-NEXT:  [[ENTRY:.*]]:
+; RV64-NEXT:    br label %[[FOR_BODY:.*]]
+; RV64:       [[FOR_COND_CLEANUP:.*]]:
+; RV64-NEXT:    ret void
+; RV64:       [[FOR_BODY]]:
+; RV64-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; RV64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @input1_f32, i64 0, i64 [[INDVARS_IV]]
+; RV64-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; RV64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @input2_f32, i64 0, i64 [[INDVARS_IV]]
+; RV64-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; RV64-NEXT:    [[TMP16:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP14]], float [[TMP15]])
+; RV64-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @output_f32, i64 0, i64 [[INDVARS_IV]]
+; RV64-NEXT:    store float [[TMP16]], ptr [[ARRAYIDX4]], align 4
+; RV64-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; RV64-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
+; RV64-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw [4096 x float], ptr @input1_f32, i64 0, i64 %indvars.iv
+  %input1 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr @input2_f32, i64 0, i64 %indvars.iv
+  %input2 = load float, ptr %arrayidx2, align 4
+  %output = tail call float @llvm.minimumnum.f32(float %input1, float %input2)
+  %arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr @output_f32, i64 0, i64 %indvars.iv
+  store float %output, ptr %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4096
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare float @llvm.minimumnum.f32(float, float)
+
+define void @f32max()  {
+; RV64-LABEL: define void @f32max(
+; RV64-SAME: ) #[[ATTR0]] {
+; RV64-NEXT:  [[ENTRY:.*]]:
+; RV64-NEXT:    br label %[[FOR_BODY:.*]]
+; RV64:       [[FOR_COND_CLEANUP:.*]]:
+; RV64-NEXT:    ret void
+; RV64:       [[FOR_BODY]]:
+; RV64-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; RV64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @input1_f32, i64 0, i64 [[INDVARS_IV]]
+; RV64-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; RV64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @input2_f32, i64 0, i64 [[INDVARS_IV]]
+; RV64-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; RV64-NEXT:    [[TMP16:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP14]], float [[TMP15]])
+; RV64-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @output_f32, i64 0, i64 [[INDVARS_IV]]
+; RV64-NEXT:    store float [[TMP16]], ptr [[ARRAYIDX4]], align 4
+; RV64-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; RV64-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
+; RV64-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw [4096 x float], ptr @input1_f32, i64 0, i64 %indvars.iv
+  %input1 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr @input2_f32, i64 0, i64 %indvars.iv
+  %input2 = load float, ptr %arrayidx2, align 4
+  %output = tail call float @llvm.maximumnum.f32(float %input1, float %input2)
+  %arrayidx4 = get...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/133843


More information about the llvm-commits mailing list