[llvm] [SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder (PR #93575)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 29 08:42:03 PDT 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/93575
>From fadab92bf4ca0e00cf36431a96d4bf376bcf3bff Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 28 May 2024 23:45:55 +0800
Subject: [PATCH 1/3] [RISCV][SDAG] Add pre-commit tests for PR93414. NFC.
---
llvm/test/CodeGen/RISCV/float-select-fcmp.ll | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
index a2ff0d33e2d31..159c9a21cd413 100644
--- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
@@ -451,3 +451,21 @@ define signext i32 @select_fcmp_uge_1_2(float %a, float %b) nounwind {
%2 = select i1 %1, i32 1, i32 2
ret i32 %2
}
+
+; Test from PR93414
+; Make sure that we don't use fmin.s here to handle signed zero correctly.
+define float @select_fcmp_olt_pos_zero(float %x) {
+; CHECK-LABEL: select_fcmp_olt_pos_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.w.x fa5, zero
+; CHECK-NEXT: fmin.s fa0, fa0, fa5
+; CHECK-NEXT: ret
+;
+; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero:
+; CHECKZFINX: # %bb.0:
+; CHECKZFINX-NEXT: fmin.s a0, a0, zero
+; CHECKZFINX-NEXT: ret
+ %cmp = fcmp olt float %x, 0.000000
+ %sel = select i1 %cmp, float %x, float 0.000000
+ ret float %sel
+}
>From eeda1424078e1800213a003153d2d17da799f22a Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 29 May 2024 00:27:26 +0800
Subject: [PATCH 2/3] [SDAG] Drop select -> fmax/min folding in SDAGBuilder
---
.../SelectionDAG/SelectionDAGBuilder.cpp | 28 ++-----------------
llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll | 5 ++--
llvm/test/CodeGen/AArch64/arm64-fmax.ll | 9 ++++--
llvm/test/CodeGen/AArch64/select_fmf.ll | 25 +++++++++--------
.../test/CodeGen/AArch64/sve-pred-selectop.ll | 28 +++++++++++--------
llvm/test/CodeGen/RISCV/float-select-fcmp.ll | 12 ++++++--
6 files changed, 50 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ca352da5d36eb..799f748fb786e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3725,32 +3725,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_UMAX: Opc = ISD::UMAX; break;
case SPF_UMIN: Opc = ISD::UMIN; break;
case SPF_SMAX: Opc = ISD::SMAX; break;
- case SPF_SMIN: Opc = ISD::SMIN; break;
- case SPF_FMINNUM:
- switch (SPR.NaNBehavior) {
- case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
- case SPNB_RETURNS_ANY:
- if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
- Opc = ISD::FMINNUM;
- break;
- }
- break;
- case SPF_FMAXNUM:
- switch (SPR.NaNBehavior) {
- case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
- case SPNB_RETURNS_ANY:
- if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
- Opc = ISD::FMAXNUM;
- break;
- }
+ case SPF_SMIN:
+ Opc = ISD::SMIN;
break;
case SPF_NABS:
Negate = true;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 550e89f4a27f9..b96c3ffbd52a8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -23,7 +23,7 @@ define double @test_cross(float %in) {
}
; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; must become fcmp + fcsel, not fmin/fminnm.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
%cmp = fcmp olt float %in, 0.000000e+00
@@ -31,7 +31,8 @@ define double @test_cross_fail_nan(float %in) {
%longer = fpext float %val to double
ret double %longer
-; CHECK: fminnm s
+; CHECK: fcmp
+; CHECK: fcsel
}
; This isn't a min or a max, but passes the first condition for swapping the
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
index d7d54a6e48a92..b2fd4821cc6eb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@@ -5,7 +5,8 @@ define double @test_direct(float %in) {
; CHECK-LABEL: test_direct:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fmaxnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s1, s0, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
@@ -18,7 +19,8 @@ define double @test_cross(float %in) {
; CHECK-LABEL: test_cross:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan ult float %in, 0.000000e+00
@@ -33,7 +35,8 @@ define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 92d8676ca04be..938217180676f 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -7,13 +7,14 @@
define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) {
; CHECK-LABEL: select_select_fold_select_and:
; CHECK: // %bb.0:
-; CHECK-NEXT: fminnm s4, s1, s2
+; CHECK-NEXT: fcmp s0, s3
+; CHECK-NEXT: fcsel s4, s0, s3, gt
; CHECK-NEXT: fcmp s1, s2
-; CHECK-NEXT: fmaxnm s2, s0, s3
-; CHECK-NEXT: fmov s1, #0.50000000
-; CHECK-NEXT: fccmp s4, s0, #4, lt
-; CHECK-NEXT: fadd s1, s0, s1
-; CHECK-NEXT: fcsel s2, s2, s0, gt
+; CHECK-NEXT: fcsel s1, s1, s2, lt
+; CHECK-NEXT: fmov s2, #0.50000000
+; CHECK-NEXT: fccmp s1, s0, #4, lt
+; CHECK-NEXT: fadd s1, s0, s2
+; CHECK-NEXT: fcsel s2, s4, s0, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB0_2
@@ -65,13 +66,13 @@ exit: ; preds = %if.end.i159.i.i, %if.then.i
define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) {
; CHECK-LABEL: select_select_fold_select_or:
; CHECK: // %bb.0:
-; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
-; CHECK-NEXT: fmaxnm s2, s0, s3
-; CHECK-NEXT: fmov s1, #0.50000000
-; CHECK-NEXT: fccmp s4, s0, #0, ge
-; CHECK-NEXT: fadd s1, s0, s1
-; CHECK-NEXT: fcsel s2, s0, s2, gt
+; CHECK-NEXT: fcsel s1, s1, s2, lt
+; CHECK-NEXT: fccmp s0, s3, #0, ge
+; CHECK-NEXT: fmov s2, #0.50000000
+; CHECK-NEXT: fccmp s1, s0, #0, le
+; CHECK-NEXT: fadd s1, s0, s2
+; CHECK-NEXT: fcsel s2, s0, s3, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
index 8438e9d88f5de..6c3ff79f911bc 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
@@ -659,9 +659,10 @@ define <vscale x 4 x float> @fcmp_fast_olt_v4f32(<vscale x 4 x float> %z, <vscal
; CHECK-LABEL: fcmp_fast_olt_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p1/m, z1.s
+; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z1.s
+; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
@@ -675,9 +676,10 @@ define <vscale x 8 x half> @fcmp_fast_olt_v8f16(<vscale x 8 x half> %z, <vscale
; CHECK-LABEL: fcmp_fast_olt_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0
-; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p1/m, z1.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
+; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
@@ -691,9 +693,10 @@ define <vscale x 4 x float> @fcmp_fast_ogt_v4f32(<vscale x 4 x float> %z, <vscal
; CHECK-LABEL: fcmp_fast_ogt_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p1/m, z1.s
+; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
@@ -707,9 +710,10 @@ define <vscale x 8 x half> @fcmp_fast_ogt_v8f16(<vscale x 8 x half> %z, <vscale
; CHECK-LABEL: fcmp_fast_ogt_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0
-; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p1/m, z1.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
+; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
index 159c9a21cd413..9e8ffb0104340 100644
--- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
@@ -458,12 +458,20 @@ define float @select_fcmp_olt_pos_zero(float %x) {
; CHECK-LABEL: select_fcmp_olt_pos_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.w.x fa5, zero
-; CHECK-NEXT: fmin.s fa0, fa0, fa5
+; CHECK-NEXT: flt.s a0, fa0, fa5
+; CHECK-NEXT: bnez a0, .LBB20_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.s fa0, fa5
+; CHECK-NEXT: .LBB20_2:
; CHECK-NEXT: ret
;
; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero:
; CHECKZFINX: # %bb.0:
-; CHECKZFINX-NEXT: fmin.s a0, a0, zero
+; CHECKZFINX-NEXT: flt.s a1, a0, zero
+; CHECKZFINX-NEXT: bnez a1, .LBB20_2
+; CHECKZFINX-NEXT: # %bb.1:
+; CHECKZFINX-NEXT: li a0, 0
+; CHECKZFINX-NEXT: .LBB20_2:
; CHECKZFINX-NEXT: ret
%cmp = fcmp olt float %x, 0.000000
%sel = select i1 %cmp, float %x, float 0.000000
>From 824540d4d83349c26b68ef46e5bcb499f57f267a Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 29 Jun 2024 23:41:38 +0800
Subject: [PATCH 3/3] [SDAG] Fix failed tests.
---
llvm/test/CodeGen/AArch64/select_fmf.ll | 9 +-
llvm/test/CodeGen/AMDGPU/fmed3.ll | 17 +-
llvm/test/CodeGen/AMDGPU/reduction.ll | 641 +++++++-----
llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll | 72 +-
.../test/CodeGen/ARM/fp16-vminmaxnm-vector.ll | 129 ++-
llvm/test/CodeGen/ARM/fp16-vminmaxnm.ll | 957 +++++++++++++++---
.../CodeGen/ARM/minnum-maxnum-intrinsics.ll | 7 +-
llvm/test/CodeGen/ARM/vminmaxnm-safe.ll | 708 ++++++++++---
llvm/test/CodeGen/ARM/vminmaxnm.ll | 703 ++++++++++---
llvm/test/CodeGen/PowerPC/vec-min-max.ll | 24 +-
.../CodeGen/PowerPC/vector-reduce-fmax.ll | 552 ++++++----
.../CodeGen/PowerPC/vector-reduce-fmin.ll | 552 ++++++----
llvm/test/CodeGen/SystemZ/vec-max-05.ll | 162 +--
.../CodeGen/SystemZ/vec-max-min-zerosplat.ll | 69 +-
llvm/test/CodeGen/SystemZ/vec-min-05.ll | 165 +--
llvm/test/CodeGen/Thumb2/mve-minmax.ll | 82 +-
llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll | 24 +-
.../test/CodeGen/Thumb2/mve-pred-selectop2.ll | 32 +-
.../test/CodeGen/Thumb2/mve-pred-selectop3.ll | 16 +-
.../CodeGen/Thumb2/mve-vecreduce-fminmax.ll | 120 ++-
20 files changed, 3659 insertions(+), 1382 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 938217180676f..c191c76b31dc2 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -7,14 +7,13 @@
define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) {
; CHECK-LABEL: select_select_fold_select_and:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcmp s0, s3
-; CHECK-NEXT: fcsel s4, s0, s3, gt
; CHECK-NEXT: fcmp s1, s2
+; CHECK-NEXT: fmov s4, #0.50000000
; CHECK-NEXT: fcsel s1, s1, s2, lt
-; CHECK-NEXT: fmov s2, #0.50000000
+; CHECK-NEXT: fmaxnm s2, s0, s3
; CHECK-NEXT: fccmp s1, s0, #4, lt
-; CHECK-NEXT: fadd s1, s0, s2
-; CHECK-NEXT: fcsel s2, s4, s0, gt
+; CHECK-NEXT: fadd s1, s0, s4
+; CHECK-NEXT: fcsel s2, s2, s0, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB0_2
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 764fb992d4d34..c6019ce9fbce0 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -1016,7 +1016,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
-; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 2.0, v2, vcc
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 4.0, v2, vcc
; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
; VI-SDAG-NEXT: s_endpgm
;
@@ -1051,7 +1054,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
+; GFX9-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc
+; GFX9-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v1
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-SDAG-NEXT: s_endpgm
;
@@ -1078,8 +1084,11 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 2.0, v1
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc_lo
+; GFX11-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 4.0, v1
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll
index 53a036b617725..1d47881a91bda 100644
--- a/llvm/test/CodeGen/AMDGPU/reduction.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduction.ll
@@ -1,14 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
-; GCN-LABEL: {{^}}reduction_fadd_v4f16:
-; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_add_f16_sdwa
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
define half @reduction_fadd_v4f16(<4 x half> %vec4) {
+; GFX9-LABEL: reduction_fadd_v4f16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_f16 v0, v0, v1
+; GFX9-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_fadd_v4f16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-NEXT: v_add_f16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%bin.rdx = fadd <4 x half> %vec4, %rdx.shuf
@@ -18,17 +26,21 @@ entry:
ret half %res
}
-; GCN-LABEL: {{^}}reduction_fsub_v4f16:
-; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_add_f16 [[ADD:v[0-9]+]], v0, v1 neg_lo:[0,1] neg_hi:[0,1]{{$}}
-; GFX9-NEXT: v_sub_f16_sdwa v0, [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: s_setpc_b64
-
-; VI: v_sub_f16_sdwa
-; VI-NEXT: v_sub_f16_e32
-; VI-NEXT: v_sub_f16_e32
-; VI-NEXT: s_setpc_b64
define half @reduction_fsub_v4f16(<4 x half> %vec4) {
+; GFX9-LABEL: reduction_fsub_v4f16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
+; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_fsub_v4f16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_sub_f16_e32 v0, v0, v1
+; VI-NEXT: v_sub_f16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%bin.rdx = fsub <4 x half> %vec4, %rdx.shuf
@@ -37,20 +49,22 @@ entry:
%res = extractelement <4 x half> %bin.rdx2, i32 0
ret half %res
}
-
; Make sure nsz is preserved when the operations are split.
-; GCN-LABEL: {{^}}reduction_fsub_v4f16_preserve_fmf:
-; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]{{$}}
-; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT: s_setpc_b64
-
-; VI: s_waitcnt
-; VI-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_sub_f16_e32 v0, v1, v0
-; VI-NEXT: v_add_f16_e32 v0, v2, v0
-; VI-NEXT: s_setpc_b64
define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) {
+; GFX9-LABEL: reduction_fsub_v4f16_preserve_fmf:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
+; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_fsub_v4f16_preserve_fmf:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_sub_f16_e32 v0, v1, v0
+; VI-NEXT: v_add_f16_e32 v0, v2, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%bin.rdx = fsub nsz <4 x half> %vec4, %rdx.shuf
@@ -61,14 +75,21 @@ entry:
ret half %neg.res
}
-; GCN-LABEL: {{^}}reduction_fmul_half4:
-; GFX9: v_pk_mul_f16 [[MUL:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_mul_f16_sdwa v{{[0-9]+}}, [[MUL]], [[MUL]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_mul_f16_sdwa
-; VI-NEXT: v_mul_f16_e32
-; VI-NEXT: v_mul_f16_e32
define half @reduction_fmul_half4(<4 x half> %vec4) {
+; GFX9-LABEL: reduction_fmul_half4:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: v_mul_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_fmul_half4:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_mul_f16_e32 v0, v0, v1
+; VI-NEXT: v_mul_f16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%bin.rdx = fmul <4 x half> %vec4, %rdx.shuf
@@ -78,14 +99,21 @@ entry:
ret half %res
}
-; GCN-LABEL: {{^}}reduction_v4i16:
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_add_u16_sdwa v{{[0-9]+}}, [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_add_u16_sdwa
-; VI-NEXT: v_add_u16_e32
-; VI-NEXT: v_add_u16_e32
define i16 @reduction_v4i16(<4 x i16> %vec4) {
+; GFX9-LABEL: reduction_v4i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-NEXT: v_add_u16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_v4i16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_add_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_u16_e32 v0, v0, v1
+; VI-NEXT: v_add_u16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%bin.rdx = add <4 x i16> %vec4, %rdx.shuf
@@ -95,21 +123,27 @@ entry:
ret i16 %res
}
-; GCN-LABEL: {{^}}reduction_half8:
-; GFX9: v_pk_add_f16 [[ADD1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_f16 [[ADD2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_f16 [[ADD3:v[0-9]+]], [[ADD2]], [[ADD1]]{{$}}
-; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_add_f16_sdwa
-; VI-NEXT: v_add_f16_sdwa
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-
define half @reduction_half8(<8 x half> %vec8) {
+; GFX9-LABEL: reduction_half8:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_f16 v1, v1, v3
+; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX9-NEXT: v_pk_add_f16 v0, v0, v1
+; GFX9-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_half8:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_f16_e32 v1, v1, v3
+; VI-NEXT: v_add_f16_e32 v0, v0, v2
+; VI-NEXT: v_add_f16_e32 v2, v5, v4
+; VI-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-NEXT: v_add_f16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = fadd <8 x half> %vec8, %rdx.shuf
@@ -121,21 +155,27 @@ entry:
ret half %res
}
-; GCN-LABEL: {{^}}reduction_v8i16:
-; GFX9: v_pk_add_u16 [[ADD1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_u16 [[ADD2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_u16 [[ADD3:v[0-9]+]], [[ADD2]], [[ADD1]]{{$}}
-; GFX9-NEXT: v_add_u16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_add_u16_sdwa
-; VI-NEXT: v_add_u16_sdwa
-; VI-NEXT: v_add_u16_e32
-; VI-NEXT: v_add_u16_e32
-; VI-NEXT: v_add_u16_e32
-; VI-NEXT: v_add_u16_e32
-; VI-NEXT: v_add_u16_e32
-
define i16 @reduction_v8i16(<8 x i16> %vec8) {
+; GFX9-LABEL: reduction_v8i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v1, v1, v3
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v2
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-NEXT: v_add_u16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_v8i16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_add_u16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_u16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_u16_e32 v1, v1, v3
+; VI-NEXT: v_add_u16_e32 v0, v0, v2
+; VI-NEXT: v_add_u16_e32 v2, v5, v4
+; VI-NEXT: v_add_u16_e32 v0, v0, v1
+; VI-NEXT: v_add_u16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = add <8 x i16> %vec8, %rdx.shuf
@@ -147,33 +187,39 @@ entry:
ret i16 %res
}
-; GCN-LABEL: {{^}}reduction_half16:
-; GFX9: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_f16 [[ADD1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_f16 [[ADD2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_add_f16 [[ADD3:v[0-9]+]], [[ADD2]], [[ADD1]]{{$}}
-; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_add_f16_sdwa
-; VI-NEXT: v_add_f16_sdwa
-; VI-NEXT: v_add_f16_sdwa
-; VI-NEXT: v_add_f16_sdwa
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-; VI-NEXT: v_add_f16_e32
-
define half @reduction_half16(<16 x half> %vec16) {
+; GFX9-LABEL: reduction_half16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_f16 v2, v2, v6
+; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX9-NEXT: v_pk_add_f16 v3, v3, v7
+; GFX9-NEXT: v_pk_add_f16 v1, v1, v5
+; GFX9-NEXT: v_pk_add_f16 v1, v1, v3
+; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX9-NEXT: v_pk_add_f16 v0, v0, v1
+; GFX9-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_half16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_add_f16_sdwa v8, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_f16_sdwa v9, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_f16_sdwa v10, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_f16_sdwa v11, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_add_f16_e32 v2, v2, v6
+; VI-NEXT: v_add_f16_e32 v0, v0, v4
+; VI-NEXT: v_add_f16_e32 v3, v3, v7
+; VI-NEXT: v_add_f16_e32 v1, v1, v5
+; VI-NEXT: v_add_f16_e32 v4, v11, v10
+; VI-NEXT: v_add_f16_e32 v5, v9, v8
+; VI-NEXT: v_add_f16_e32 v1, v1, v3
+; VI-NEXT: v_add_f16_e32 v0, v0, v2
+; VI-NEXT: v_add_f16_e32 v2, v5, v4
+; VI-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-NEXT: v_add_f16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = fadd <16 x half> %vec16, %rdx.shuf
@@ -187,14 +233,21 @@ entry:
ret half %res
}
-; GCN-LABEL: {{^}}reduction_min_v4i16:
-; GFX9: v_pk_min_u16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_min_u16_sdwa
-; VI-NEXT: v_min_u16_e32
-; VI-NEXT: v_min_u16_e32
define i16 @reduction_min_v4i16(<4 x i16> %vec4) {
+; GFX9-LABEL: reduction_min_v4i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_min_u16 v0, v0, v1
+; GFX9-NEXT: v_min_u16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_min_v4i16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_min_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_min_u16_e32 v0, v0, v1
+; VI-NEXT: v_min_u16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.minmax.cmp = icmp ult <4 x i16> %vec4, %rdx.shuf
@@ -206,20 +259,27 @@ entry:
ret i16 %res
}
-; GCN-LABEL: {{^}}reduction_umin_v8i16:
-; GFX9: v_pk_min_u16 [[MIN1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_min_u16 [[MIN2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_min_u16 [[MIN3:v[0-9]+]], [[MIN2]], [[MIN1]]{{$}}
-; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, [[MIN3]], [[MIN3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_min_u16_sdwa
-; VI-NEXT: v_min_u16_sdwa
-; VI-NEXT: v_min_u16_e32
-; VI-NEXT: v_min_u16_e32
-; VI-NEXT: v_min_u16_e32
-; VI-NEXT: v_min_u16_e32
-; VI-NEXT: v_min_u16_e32
define i16 @reduction_umin_v8i16(<8 x i16> %vec8) {
+; GFX9-LABEL: reduction_umin_v8i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_min_u16 v1, v1, v3
+; GFX9-NEXT: v_pk_min_u16 v0, v0, v2
+; GFX9-NEXT: v_pk_min_u16 v0, v0, v1
+; GFX9-NEXT: v_min_u16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_umin_v8i16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_min_u16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_min_u16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_min_u16_e32 v1, v1, v3
+; VI-NEXT: v_min_u16_e32 v0, v0, v2
+; VI-NEXT: v_min_u16_e32 v2, v5, v4
+; VI-NEXT: v_min_u16_e32 v0, v0, v1
+; VI-NEXT: v_min_u16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.minmax.cmp = icmp ult <8 x i16> %vec8, %rdx.shuf
@@ -233,17 +293,31 @@ entry:
%res = extractelement <8 x i16> %rdx.minmax.select6, i32 0
ret i16 %res
}
-
; Tests to make sure without slp the number of instructions are more.
-; GCN-LABEL: {{^}}reduction_umin_v8i16_woslp:
-; GFX9: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min3_u16
-; GFX9-NEXT: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min3_u16
-; GFX9-NEXT: v_min3_u16
define i16 @reduction_umin_v8i16_woslp(<8 x i16> %vec8) {
+; GFX9-LABEL: reduction_umin_v8i16_woslp:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v1
+; GFX9-NEXT: v_min_u16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX9-NEXT: v_min3_u16 v0, v4, v1, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX9-NEXT: v_min3_u16 v0, v5, v2, v0
+; GFX9-NEXT: v_min3_u16 v0, v6, v3, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_umin_v8i16_woslp:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_min_u16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_u16_e32 v0, v1, v0
+; VI-NEXT: v_min_u16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_u16_e32 v0, v2, v0
+; VI-NEXT: v_min_u16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_u16_e32 v0, v3, v0
+; VI-NEXT: v_min_u16_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%elt0 = extractelement <8 x i16> %vec8, i64 0
%elt1 = extractelement <8 x i16> %vec8, i64 1
@@ -253,53 +327,56 @@ entry:
%elt5 = extractelement <8 x i16> %vec8, i64 5
%elt6 = extractelement <8 x i16> %vec8, i64 6
%elt7 = extractelement <8 x i16> %vec8, i64 7
-
%cmp0 = icmp ult i16 %elt1, %elt0
%min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
%cmp1 = icmp ult i16 %elt2, %min1
%min2 = select i1 %cmp1, i16 %elt2, i16 %min1
%cmp2 = icmp ult i16 %elt3, %min2
%min3 = select i1 %cmp2, i16 %elt3, i16 %min2
-
%cmp3 = icmp ult i16 %elt4, %min3
%min4 = select i1 %cmp3, i16 %elt4, i16 %min3
%cmp4 = icmp ult i16 %elt5, %min4
%min5 = select i1 %cmp4, i16 %elt5, i16 %min4
-
%cmp5 = icmp ult i16 %elt6, %min5
%min6 = select i1 %cmp5, i16 %elt6, i16 %min5
%cmp6 = icmp ult i16 %elt7, %min6
%min7 = select i1 %cmp6, i16 %elt7, i16 %min6
-
ret i16 %min7
}
-; GCN-LABEL: {{^}}reduction_smin_v16i16:
-; GFX9: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_min_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_min_i16_sdwa
-; VI-NEXT: v_min_i16_sdwa
-; VI-NEXT: v_min_i16_sdwa
-; VI-NEXT: v_min_i16_sdwa
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
-; VI-NEXT: v_min_i16_e32
define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
+; GFX9-LABEL: reduction_smin_v16i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_min_i16 v2, v2, v6
+; GFX9-NEXT: v_pk_min_i16 v0, v0, v4
+; GFX9-NEXT: v_pk_min_i16 v3, v3, v7
+; GFX9-NEXT: v_pk_min_i16 v1, v1, v5
+; GFX9-NEXT: v_pk_min_i16 v1, v1, v3
+; GFX9-NEXT: v_pk_min_i16 v0, v0, v2
+; GFX9-NEXT: v_pk_min_i16 v0, v0, v1
+; GFX9-NEXT: v_min_i16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_smin_v16i16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_min_i16_sdwa v8, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_min_i16_sdwa v9, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_min_i16_sdwa v10, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_min_i16_sdwa v11, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_min_i16_e32 v2, v2, v6
+; VI-NEXT: v_min_i16_e32 v0, v0, v4
+; VI-NEXT: v_min_i16_e32 v3, v3, v7
+; VI-NEXT: v_min_i16_e32 v1, v1, v5
+; VI-NEXT: v_min_i16_e32 v4, v11, v10
+; VI-NEXT: v_min_i16_e32 v5, v9, v8
+; VI-NEXT: v_min_i16_e32 v1, v1, v3
+; VI-NEXT: v_min_i16_e32 v0, v0, v2
+; VI-NEXT: v_min_i16_e32 v2, v5, v4
+; VI-NEXT: v_min_i16_e32 v0, v0, v1
+; VI-NEXT: v_min_i16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.minmax.cmp = icmp slt <16 x i16> %vec16, %rdx.shuf
@@ -316,25 +393,47 @@ entry:
%res = extractelement <16 x i16> %rdx.minmax.select9, i32 0
ret i16 %res
}
-
; Tests to make sure without slp the number of instructions are more.
-; GCN-LABEL: {{^}}reduction_smin_v16i16_woslp:
-; GFX9: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min3_i16
-; GFX9-NEXT: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min3_i16
-; GFX9-NEXT: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min3_i16
-; GFX9-NEXT: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min3_i16
-; GFX9-NEXT: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min3_i16
-; GFX9-NEXT: v_lshrrev_b32_e32
-; GFX9-NEXT: v_min3_i16
-; GFX9-NEXT: v_min3_i16
define i16 @reduction_smin_v16i16_woslp(<16 x i16> %vec16) {
+; GFX9-LABEL: reduction_smin_v16i16_woslp:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v1
+; GFX9-NEXT: v_min_i16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v2
+; GFX9-NEXT: v_min3_i16 v0, v8, v1, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v3
+; GFX9-NEXT: v_min3_i16 v0, v9, v2, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v11, 16, v4
+; GFX9-NEXT: v_min3_i16 v0, v10, v3, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v5
+; GFX9-NEXT: v_min3_i16 v0, v11, v4, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v6
+; GFX9-NEXT: v_min3_i16 v0, v12, v5, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v7
+; GFX9-NEXT: v_min3_i16 v0, v13, v6, v0
+; GFX9-NEXT: v_min3_i16 v0, v14, v7, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_smin_v16i16_woslp:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_min_i16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_i16_e32 v0, v1, v0
+; VI-NEXT: v_min_i16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_i16_e32 v0, v2, v0
+; VI-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_i16_e32 v0, v3, v0
+; VI-NEXT: v_min_i16_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_i16_e32 v0, v4, v0
+; VI-NEXT: v_min_i16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_i16_e32 v0, v5, v0
+; VI-NEXT: v_min_i16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_i16_e32 v0, v6, v0
+; VI-NEXT: v_min_i16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_min_i16_e32 v0, v7, v0
+; VI-NEXT: v_min_i16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%elt0 = extractelement <16 x i16> %vec16, i64 0
%elt1 = extractelement <16 x i16> %vec16, i64 1
@@ -344,7 +443,6 @@ entry:
%elt5 = extractelement <16 x i16> %vec16, i64 5
%elt6 = extractelement <16 x i16> %vec16, i64 6
%elt7 = extractelement <16 x i16> %vec16, i64 7
-
%elt8 = extractelement <16 x i16> %vec16, i64 8
%elt9 = extractelement <16 x i16> %vec16, i64 9
%elt10 = extractelement <16 x i16> %vec16, i64 10
@@ -353,56 +451,55 @@ entry:
%elt13 = extractelement <16 x i16> %vec16, i64 13
%elt14 = extractelement <16 x i16> %vec16, i64 14
%elt15 = extractelement <16 x i16> %vec16, i64 15
-
%cmp0 = icmp slt i16 %elt1, %elt0
%min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
%cmp1 = icmp slt i16 %elt2, %min1
%min2 = select i1 %cmp1, i16 %elt2, i16 %min1
%cmp2 = icmp slt i16 %elt3, %min2
%min3 = select i1 %cmp2, i16 %elt3, i16 %min2
-
%cmp3 = icmp slt i16 %elt4, %min3
%min4 = select i1 %cmp3, i16 %elt4, i16 %min3
%cmp4 = icmp slt i16 %elt5, %min4
%min5 = select i1 %cmp4, i16 %elt5, i16 %min4
-
%cmp5 = icmp slt i16 %elt6, %min5
%min6 = select i1 %cmp5, i16 %elt6, i16 %min5
%cmp6 = icmp slt i16 %elt7, %min6
%min7 = select i1 %cmp6, i16 %elt7, i16 %min6
-
%cmp7 = icmp slt i16 %elt8, %min7
%min8 = select i1 %cmp7, i16 %elt8, i16 %min7
%cmp8 = icmp slt i16 %elt9, %min8
%min9 = select i1 %cmp8, i16 %elt9, i16 %min8
-
%cmp9 = icmp slt i16 %elt10, %min9
%min10 = select i1 %cmp9, i16 %elt10, i16 %min9
%cmp10 = icmp slt i16 %elt11, %min10
%min11 = select i1 %cmp10, i16 %elt11, i16 %min10
-
%cmp11 = icmp slt i16 %elt12, %min11
%min12 = select i1 %cmp11, i16 %elt12, i16 %min11
%cmp12 = icmp slt i16 %elt13, %min12
%min13 = select i1 %cmp12, i16 %elt13, i16 %min12
-
%cmp13 = icmp slt i16 %elt14, %min13
%min14 = select i1 %cmp13, i16 %elt14, i16 %min13
%cmp14 = icmp slt i16 %elt15, %min14
%min15 = select i1 %cmp14, i16 %elt15, i16 %min14
-
ret i16 %min15
}
-; GCN-LABEL: {{^}}reduction_umax_v4i16:
-; GFX9: v_pk_max_u16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_max_u16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_max_u16_sdwa
-; VI-NEXT: v_max_u16_e32
-; VI-NEXT: v_max_u16_e32
define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
+; GFX9-LABEL: reduction_umax_v4i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_max_u16 v0, v0, v1
+; GFX9-NEXT: v_max_u16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_umax_v4i16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_max_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_max_u16_e32 v0, v0, v1
+; VI-NEXT: v_max_u16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.minmax.cmp = icmp ugt <4 x i16> %vec4, %rdx.shuf
@@ -414,14 +511,21 @@ entry:
ret i16 %res
}
-; GCN-LABEL: {{^}}reduction_smax_v4i16:
-; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX9-NEXT: v_max_i16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI: v_max_i16_sdwa
-; VI-NEXT: v_max_i16_e32
-; VI-NEXT: v_max_i16_e32
define i16 @reduction_smax_v4i16(<4 x i16> %vec4) #0 {
+; GFX9-LABEL: reduction_smax_v4i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_max_i16 v0, v0, v1
+; GFX9-NEXT: v_max_i16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_smax_v4i16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_max_i16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_max_i16_e32 v0, v0, v1
+; VI-NEXT: v_max_i16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.minmax.cmp = icmp sgt <4 x i16> %vec4, %rdx.shuf
@@ -433,23 +537,27 @@ entry:
ret i16 %res
}
-; GCN-LABEL: {{^}}reduction_maxnum_v4f16:
-; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
-; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
-; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
-; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-
-; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
-; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1
-
-; VI-DAG: v_max_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
-; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
-; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_maxnum_v4f16(<4 x half> %vec4) {
+; GFX9-LABEL: reduction_maxnum_v4f16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX9-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_maxnum_v4f16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_max_f16_e32 v1, v1, v1
+; VI-NEXT: v_max_f16_e32 v0, v0, v0
+; VI-NEXT: v_max_f16_e32 v2, v3, v2
+; VI-NEXT: v_max_f16_e32 v0, v0, v1
+; VI-NEXT: v_max_f16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.minmax = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
@@ -459,22 +567,27 @@ entry:
ret half %res
}
-; GCN-LABEL: {{^}}reduction_minnum_v4f16:
-; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
-; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
-; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
-; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
-; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1
-
-; VI-DAG: v_min_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
-; VI-DAG: v_min_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
-; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_minnum_v4f16(<4 x half> %vec4) {
+; GFX9-LABEL: reduction_minnum_v4f16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX9-NEXT: v_min_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: reduction_minnum_v4f16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT: v_max_f16_e32 v1, v1, v1
+; VI-NEXT: v_max_f16_e32 v0, v0, v0
+; VI-NEXT: v_min_f16_e32 v2, v3, v2
+; VI-NEXT: v_min_f16_e32 v0, v0, v1
+; VI-NEXT: v_min_f16_e32 v0, v0, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.minmax = call <4 x half> @llvm.minnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
@@ -486,32 +599,19 @@ entry:
; FIXME: Need to preserve fast math flags when fmaxnum matched
; directly from the IR to avoid unnecessary quieting.
-
-; GCN-LABEL: {{^}}reduction_fast_max_pattern_v4f16:
-; XGFX9: v_pk_max_f16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; XGFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; XVI: s_waitcnt
-; XVI-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; XVI-NEXT: v_max_f16_e32 v0, v0, v1
-; XVI-NEXT: v_max_f16_e32 v0, v0, v2
-; XVI-NEXT: s_setpc_b64
-
-; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
-; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
-; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
-; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
-; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1
-
-; VI-DAG: v_max_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
-; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
-; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
+; GCN-LABEL: reduction_fast_max_pattern_v4f16:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GCN-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
+; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; GCN-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_cmp_gt_f16_e32 vcc, v0, v2
+; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf
@@ -526,31 +626,19 @@ entry:
; FIXME: Need to preserve fast math flags when fmaxnum matched
; directly from the IR to avoid unnecessary quieting.
-; GCN-LABEL: {{^}}reduction_fast_min_pattern_v4f16:
-; XGFX9: v_pk_min_f16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; XGFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; XVI: s_waitcnt
-; XVI-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; XVI-NEXT: v_min_f16_e32 v0, v0, v1
-; XVI-NEXT: v_min_f16_e32 v0, v0, v2
-; XVI-NEXT: s_setpc_b64
-
-; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
-; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
-; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
-; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-
-; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
-; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1
-
-; VI-DAG: v_min_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
-; VI-DAG: v_min_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
-; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
+; GCN-LABEL: reduction_fast_min_pattern_v4f16:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GCN-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
+; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; GCN-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_cmp_lt_f16_e32 vcc, v0, v2
+; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf
@@ -561,6 +649,3 @@ entry:
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0
ret half %res
}
-
-declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
-declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>)
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 56e734c440433..a95e1717869e3 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -201,7 +201,9 @@ define half @fp16_vminnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -229,7 +231,9 @@ define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -251,7 +255,9 @@ define half @fp16_vminnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -275,7 +281,9 @@ define half @fp16_vminnm_NNNule(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -306,7 +314,9 @@ define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -330,7 +340,9 @@ define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -354,7 +366,9 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -384,7 +398,9 @@ define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -410,7 +426,9 @@ define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -432,7 +450,9 @@ define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -456,7 +476,9 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -480,12 +502,14 @@ entry:
define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
-; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI23_0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -508,7 +532,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -526,12 +552,14 @@ entry:
define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI25_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
-; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI25_0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
index 6a61bb594b430..a7169bbe0b455 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7a -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard | FileCheck %s
@@ -7,7 +8,9 @@
define <4 x half> @test1(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test1:
-; CHECK: vmaxnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 d16, d0, d1
+; CHECK-NEXT: vbif d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
@@ -16,7 +19,9 @@ define <4 x half> @test1(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test2(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test2:
-; CHECK: vminnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 d16, d0, d1
+; CHECK-NEXT: vbit d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
@@ -25,7 +30,9 @@ define <4 x half> @test2(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test3(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test3:
-; CHECK: vminnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 d16, d0, d1
+; CHECK-NEXT: vbit d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
@@ -34,7 +41,9 @@ define <4 x half> @test3(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test4(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test4:
-; CHECK: vmaxnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 d16, d0, d1
+; CHECK-NEXT: vbif d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
@@ -43,7 +52,9 @@ define <4 x half> @test4(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test5(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test5:
-; CHECK: vminnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 d16, d1, d0
+; CHECK-NEXT: vbif d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
@@ -52,7 +63,9 @@ define <4 x half> @test5(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test6(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test6:
-; CHECK: vmaxnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 d16, d1, d0
+; CHECK-NEXT: vbit d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
@@ -61,7 +74,9 @@ define <4 x half> @test6(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test7(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test7:
-; CHECK: vminnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 d16, d1, d0
+; CHECK-NEXT: vbif d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
@@ -70,7 +85,9 @@ define <4 x half> @test7(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test8(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test8:
-; CHECK: vmaxnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 d16, d1, d0
+; CHECK-NEXT: vbit d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
@@ -81,7 +98,9 @@ define <4 x half> @test8(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test11(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test11:
-; CHECK: vmaxnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 d16, d0, d1
+; CHECK-NEXT: vbif d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
@@ -90,7 +109,9 @@ define <4 x half> @test11(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test12(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test12:
-; CHECK: vminnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 d16, d0, d1
+; CHECK-NEXT: vbit d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
@@ -99,7 +120,9 @@ define <4 x half> @test12(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test13(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test13:
-; CHECK: vminnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 d16, d0, d1
+; CHECK-NEXT: vbit d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
@@ -108,7 +131,9 @@ define <4 x half> @test13(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test14(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test14:
-; CHECK: vmaxnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 d16, d0, d1
+; CHECK-NEXT: vbif d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
@@ -117,7 +142,9 @@ define <4 x half> @test14(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test15(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test15:
-; CHECK: vminnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 d16, d1, d0
+; CHECK-NEXT: vbif d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
@@ -126,7 +153,9 @@ define <4 x half> @test15(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test16(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test16:
-; CHECK: vmaxnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 d16, d1, d0
+; CHECK-NEXT: vbit d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
@@ -135,7 +164,9 @@ define <4 x half> @test16(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test17(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test17:
-; CHECK: vminnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 d16, d1, d0
+; CHECK-NEXT: vbif d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
@@ -144,7 +175,9 @@ define <4 x half> @test17(<4 x half> %A, <4 x half> %B) {
define <4 x half> @test18(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: test18:
-; CHECK: vmaxnm.f16 d0, d0, d1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 d16, d1, d0
+; CHECK-NEXT: vbit d0, d1, d16
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <4 x half> %A, %B
%tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
@@ -157,7 +190,9 @@ define <4 x half> @test18(<4 x half> %A, <4 x half> %B) {
define <8 x half> @test201(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test201:
-; CHECK: vmaxnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 q8, q0, q1
+; CHECK-NEXT: vbif q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
@@ -166,7 +201,9 @@ define <8 x half> @test201(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test202(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test202:
-; CHECK: vminnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 q8, q0, q1
+; CHECK-NEXT: vbit q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
@@ -175,7 +212,9 @@ define <8 x half> @test202(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test203(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test203:
-; CHECK: vmaxnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 q8, q0, q1
+; CHECK-NEXT: vbif q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
@@ -184,7 +223,9 @@ define <8 x half> @test203(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test204(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test204:
-; CHECK: vminnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 q8, q0, q1
+; CHECK-NEXT: vbit q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
@@ -193,7 +234,9 @@ define <8 x half> @test204(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test205(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test205:
-; CHECK: vminnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 q8, q1, q0
+; CHECK-NEXT: vbif q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
@@ -202,7 +245,9 @@ define <8 x half> @test205(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test206(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test206:
-; CHECK: vmaxnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 q8, q1, q0
+; CHECK-NEXT: vbit q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
@@ -211,7 +256,9 @@ define <8 x half> @test206(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test207(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test207:
-; CHECK: vminnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 q8, q1, q0
+; CHECK-NEXT: vbif q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
@@ -220,7 +267,9 @@ define <8 x half> @test207(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test208(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test208:
-; CHECK: vmaxnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 q8, q1, q0
+; CHECK-NEXT: vbit q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
@@ -231,7 +280,9 @@ define <8 x half> @test208(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test209(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test209:
-; CHECK: vmaxnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 q8, q0, q1
+; CHECK-NEXT: vbif q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
@@ -240,7 +291,9 @@ define <8 x half> @test209(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test210(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test210:
-; CHECK: vminnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 q8, q0, q1
+; CHECK-NEXT: vbit q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
@@ -249,7 +302,9 @@ define <8 x half> @test210(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test211(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test211:
-; CHECK: vmaxnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 q8, q0, q1
+; CHECK-NEXT: vbif q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
@@ -258,7 +313,9 @@ define <8 x half> @test211(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test214(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test214:
-; CHECK: vminnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 q8, q0, q1
+; CHECK-NEXT: vbit q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
@@ -267,7 +324,9 @@ define <8 x half> @test214(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test215(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test215:
-; CHECK: vminnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 q8, q1, q0
+; CHECK-NEXT: vbif q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
@@ -276,7 +335,9 @@ define <8 x half> @test215(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test216(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test216:
-; CHECK: vmaxnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcgt.f16 q8, q1, q0
+; CHECK-NEXT: vbit q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
@@ -285,7 +346,9 @@ define <8 x half> @test216(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test217(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test217:
-; CHECK: vminnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 q8, q1, q0
+; CHECK-NEXT: vbif q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B
@@ -294,7 +357,9 @@ define <8 x half> @test217(<8 x half> %A, <8 x half> %B) {
define <8 x half> @test218(<8 x half> %A, <8 x half> %B) {
; CHECK-LABEL: test218:
-; CHECK: vmaxnm.f16 q0, q0, q1
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcge.f16 q8, q1, q0
+; CHECK-NEXT: vbit q0, q1, q8
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <8 x half> %A, %B
%tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm.ll
index 525c27be4f977..0cfd5c502d8f5 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=arm-eabi -mattr=+fullfp16 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s
-; RUN: llc < %s -mtriple thumbv7a -mattr=+fullfp16 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+fullfp16 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=ARMEABI
+; RUN: llc < %s -mtriple thumbv7a -mattr=+fullfp16 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=THUMBV7A
; TODO: we can't pass half-precision arguments as "half" types yet. We do
; that for the time being by passing "float %f.coerce" and the necessary
@@ -8,10 +9,25 @@
; want to use that here.
define half @fp16_vminnm_o(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vminnm_o:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminnm_o:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r1
+; ARMEABI-NEXT: vmov.f16 s2, r0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vminnm_o:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r1
+; THUMBV7A-NEXT: vmov.f16 s2, r0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -21,10 +37,25 @@ entry:
}
define half @fp16_vminnm_o_rev(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vminnm_o_rev:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminnm_o_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, r1
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vminnm_o_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, r1
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -34,10 +65,25 @@ entry:
}
define half @fp16_vminnm_u(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vminnm_u:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminnm_u:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r1
+; ARMEABI-NEXT: vmov.f16 s2, r0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vminnm_u:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r1
+; THUMBV7A-NEXT: vmov.f16 s2, r0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -47,10 +93,25 @@ entry:
}
define half @fp16_vminnm_ule(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vminnm_ule:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminnm_ule:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r1
+; ARMEABI-NEXT: vmov.f16 s2, r0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vminnm_ule:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r1
+; THUMBV7A-NEXT: vmov.f16 s2, r0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -60,10 +121,25 @@ entry:
}
define half @fp16_vminnm_u_rev(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vminnm_u_rev:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminnm_u_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, r1
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vminnm_u_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, r1
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -73,10 +149,25 @@ entry:
}
define half @fp16_vmaxnm_o(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_o:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vmaxnm_o:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r1
+; ARMEABI-NEXT: vmov.f16 s2, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_o:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r1
+; THUMBV7A-NEXT: vmov.f16 s2, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -86,10 +177,25 @@ entry:
}
define half @fp16_vmaxnm_oge(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_oge:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vmaxnm_oge:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r1
+; ARMEABI-NEXT: vmov.f16 s2, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_oge:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r1
+; THUMBV7A-NEXT: vmov.f16 s2, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -99,10 +205,25 @@ entry:
}
define half @fp16_vmaxnm_o_rev(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_o_rev:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vmaxnm_o_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, r1
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_o_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, r1
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -112,10 +233,25 @@ entry:
}
define half @fp16_vmaxnm_ole_rev(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_ole_rev:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vmaxnm_ole_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, r1
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_ole_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, r1
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -125,10 +261,25 @@ entry:
}
define half @fp16_vmaxnm_u(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_u:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vmaxnm_u:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r1
+; ARMEABI-NEXT: vmov.f16 s2, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_u:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r1
+; THUMBV7A-NEXT: vmov.f16 s2, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -138,10 +289,25 @@ entry:
}
define half @fp16_vmaxnm_uge(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_uge:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vmaxnm_uge:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r1
+; ARMEABI-NEXT: vmov.f16 s2, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_uge:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r1
+; THUMBV7A-NEXT: vmov.f16 s2, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -151,10 +317,25 @@ entry:
}
define half @fp16_vmaxnm_u_rev(i16 signext %a, i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_u_rev:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vmaxnm_u_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, r1
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_u_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, r1
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
@@ -166,12 +347,41 @@ entry:
; known non-NaNs
define half @fp16_vminnm_NNNo(i16 signext %a) {
-; CHECK-LABEL: fp16_vminnm_NNNo:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vminnm_NNNo:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, #1.200000e+01
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vldr.16 s2, .LCPI12_0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI12_0:
+; ARMEABI-NEXT: .short 0x5040 @ half 34
+;
+; THUMBV7A-LABEL: fp16_vminnm_NNNo:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, #1.200000e+01
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI12_0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI12_0:
+; THUMBV7A-NEXT: .short 0x5040 @ half 34
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast olt half %0, 12.
@@ -182,12 +392,45 @@ entry:
}
define half @fp16_vminnm_NNNo_rev(i16 signext %a) {
-; CHECK-LABEL: fp16_vminnm_NNNo_rev:
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vminnm_NNNo_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI13_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vldr.16 s2, .LCPI13_1
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI13_0:
+; ARMEABI-NEXT: .short 0x5300 @ half 56
+; ARMEABI-NEXT: .LCPI13_1:
+; ARMEABI-NEXT: .short 0x54e0 @ half 78
+;
+; THUMBV7A-LABEL: fp16_vminnm_NNNo_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI13_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI13_1
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI13_0:
+; THUMBV7A-NEXT: .short 0x5300 @ half 56
+; THUMBV7A-NEXT: .LCPI13_1:
+; THUMBV7A-NEXT: .short 0x54e0 @ half 78
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast ogt half %0, 56.
@@ -198,12 +441,41 @@ entry:
}
define half @fp16_vminnm_NNNu(i16 signext %b) {
-; CHECK-LABEL: fp16_vminnm_NNNu:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vminnm_NNNu:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, #1.200000e+01
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vldr.16 s2, .LCPI14_0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI14_0:
+; ARMEABI-NEXT: .short 0x5040 @ half 34
+;
+; THUMBV7A-LABEL: fp16_vminnm_NNNu:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, #1.200000e+01
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI14_0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI14_0:
+; THUMBV7A-NEXT: .short 0x5040 @ half 34
entry:
%0 = bitcast i16 %b to half
%cmp1 = fcmp fast ult half 12., %0
@@ -214,12 +486,45 @@ entry:
}
define half @fp16_vminnm_NNNule(i16 signext %b) {
-; CHECK-LABEL: fp16_vminnm_NNNule:
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vminnm_NNNule:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI15_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vldr.16 s2, .LCPI15_1
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI15_0:
+; ARMEABI-NEXT: .short 0x5040 @ half 34
+; ARMEABI-NEXT: .LCPI15_1:
+; ARMEABI-NEXT: .short 0x5300 @ half 56
+;
+; THUMBV7A-LABEL: fp16_vminnm_NNNule:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI15_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI15_1
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI15_0:
+; THUMBV7A-NEXT: .short 0x5040 @ half 34
+; THUMBV7A-NEXT: .LCPI15_1:
+; THUMBV7A-NEXT: .short 0x5300 @ half 56
entry:
%0 = bitcast i16 %b to half
%cmp1 = fcmp fast ule half 34., %0
@@ -230,12 +535,45 @@ entry:
}
define half @fp16_vminnm_NNNu_rev(i16 signext %b) {
-; CHECK-LABEL: fp16_vminnm_NNNu_rev:
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vminnm_NNNu_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI16_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vldr.16 s2, .LCPI16_1
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI16_0:
+; ARMEABI-NEXT: .short 0x5300 @ half 56
+; ARMEABI-NEXT: .LCPI16_1:
+; ARMEABI-NEXT: .short 0x54e0 @ half 78
+;
+; THUMBV7A-LABEL: fp16_vminnm_NNNu_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI16_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI16_1
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI16_0:
+; THUMBV7A-NEXT: .short 0x5300 @ half 56
+; THUMBV7A-NEXT: .LCPI16_1:
+; THUMBV7A-NEXT: .short 0x54e0 @ half 78
entry:
%0 = bitcast i16 %b to half
%cmp1 = fcmp fast ugt half 56., %0
@@ -246,12 +584,41 @@ entry:
}
define half @fp16_vmaxnm_NNNo(i16 signext %a) {
-; CHECK-LABEL: fp16_vmaxnm_NNNo:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vmaxnm_NNNo:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, #1.200000e+01
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vldr.16 s2, .LCPI17_0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI17_0:
+; ARMEABI-NEXT: .short 0x5040 @ half 34
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_NNNo:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, #1.200000e+01
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI17_0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI17_0:
+; THUMBV7A-NEXT: .short 0x5040 @ half 34
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast ogt half %0, 12.
@@ -262,12 +629,45 @@ entry:
}
define half @fp16_vmaxnm_NNNoge(i16 signext %a) {
-; CHECK-LABEL: fp16_vmaxnm_NNNoge:
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vmaxnm_NNNoge:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI18_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s0, s2
+; ARMEABI-NEXT: vldr.16 s2, .LCPI18_1
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI18_0:
+; ARMEABI-NEXT: .short 0x5040 @ half 34
+; ARMEABI-NEXT: .LCPI18_1:
+; ARMEABI-NEXT: .short 0x5300 @ half 56
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_NNNoge:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI18_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s0, s2
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI18_1
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI18_0:
+; THUMBV7A-NEXT: .short 0x5040 @ half 34
+; THUMBV7A-NEXT: .LCPI18_1:
+; THUMBV7A-NEXT: .short 0x5300 @ half 56
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast oge half %0, 34.
@@ -278,12 +678,45 @@ entry:
}
define half @fp16_vmaxnm_NNNo_rev(i16 signext %a) {
-; CHECK-LABEL: fp16_vmaxnm_NNNo_rev:
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vmaxnm_NNNo_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI19_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vldr.16 s2, .LCPI19_1
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI19_0:
+; ARMEABI-NEXT: .short 0x5300 @ half 56
+; ARMEABI-NEXT: .LCPI19_1:
+; ARMEABI-NEXT: .short 0x54e0 @ half 78
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_NNNo_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI19_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI19_1
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI19_0:
+; THUMBV7A-NEXT: .short 0x5300 @ half 56
+; THUMBV7A-NEXT: .LCPI19_1:
+; THUMBV7A-NEXT: .short 0x54e0 @ half 78
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast olt half %0, 56.
@@ -294,12 +727,45 @@ entry:
}
define half @fp16_vmaxnm_NNNole_rev(i16 signext %a) {
-; CHECK-LABEL: fp16_vmaxnm_NNNole_rev:
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vmaxnm_NNNole_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI20_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vldr.16 s2, .LCPI20_1
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI20_0:
+; ARMEABI-NEXT: .short 0x54e0 @ half 78
+; ARMEABI-NEXT: .LCPI20_1:
+; ARMEABI-NEXT: .short 0x55a0 @ half 90
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_NNNole_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI20_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI20_1
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI20_0:
+; THUMBV7A-NEXT: .short 0x54e0 @ half 78
+; THUMBV7A-NEXT: .LCPI20_1:
+; THUMBV7A-NEXT: .short 0x55a0 @ half 90
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast ole half %0, 78.
@@ -310,12 +776,41 @@ entry:
}
define half @fp16_vmaxnm_NNNu(i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_NNNu:
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vmaxnm_NNNu:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vmov.f16 s2, #1.200000e+01
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vldr.16 s2, .LCPI21_0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI21_0:
+; ARMEABI-NEXT: .short 0x5040 @ half 34
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_NNNu:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vmov.f16 s2, #1.200000e+01
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI21_0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI21_0:
+; THUMBV7A-NEXT: .short 0x5040 @ half 34
entry:
%0 = bitcast i16 %b to half
%cmp1 = fcmp fast ugt half 12., %0
@@ -326,12 +821,45 @@ entry:
}
define half @fp16_vmaxnm_NNNuge(i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_NNNuge:
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vmaxnm_NNNuge:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI22_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vldr.16 s2, .LCPI22_1
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI22_0:
+; ARMEABI-NEXT: .short 0x5040 @ half 34
+; ARMEABI-NEXT: .LCPI22_1:
+; ARMEABI-NEXT: .short 0x5300 @ half 56
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_NNNuge:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI22_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI22_1
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI22_0:
+; THUMBV7A-NEXT: .short 0x5040 @ half 34
+; THUMBV7A-NEXT: .LCPI22_1:
+; THUMBV7A-NEXT: .short 0x5300 @ half 56
entry:
%0 = bitcast i16 %b to half
%cmp1 = fcmp fast uge half 34., %0
@@ -342,12 +870,45 @@ entry:
}
define half @fp16_vmaxnm_NNNu_rev(i16 signext %b) {
-; CHECK-LABEL: fp16_vmaxnm_NNNu_rev:
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
-; CHECK: vldr.16 s2, .LCPI{{.*}}
-; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
+; ARMEABI-LABEL: fp16_vmaxnm_NNNu_rev:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI23_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vldr.16 s2, .LCPI23_1
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI23_0:
+; ARMEABI-NEXT: .short 0x5300 @ half 56
+; ARMEABI-NEXT: .LCPI23_1:
+; ARMEABI-NEXT: .short 0x54e0 @ half 78
+;
+; THUMBV7A-LABEL: fp16_vmaxnm_NNNu_rev:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI23_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI23_1
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI23_0:
+; THUMBV7A-NEXT: .short 0x5300 @ half 56
+; THUMBV7A-NEXT: .LCPI23_1:
+; THUMBV7A-NEXT: .short 0x54e0 @ half 78
entry:
%0 = bitcast i16 %b to half
%cmp1 = fcmp fast ult half 56., %0
@@ -358,11 +919,39 @@ entry:
}
define half @fp16_vminmaxnm_0(i16 signext %a) {
-; CHECK-LABEL: fp16_vminmaxnm_0:
-; CHECK: vldr.16 s0, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s2, s2, s0
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminmaxnm_0:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI24_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vcmp.f16 s0, #0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI24_0:
+; ARMEABI-NEXT: .short 0x0000 @ half 0
+;
+; THUMBV7A-LABEL: fp16_vminmaxnm_0:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI24_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vcmp.f16 s0, #0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI24_0:
+; THUMBV7A-NEXT: .short 0x0000 @ half 0
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast olt half %0, 0.
@@ -373,11 +962,39 @@ entry:
}
define half @fp16_vminmaxnm_neg0(i16 signext %a) {
-; CHECK-LABEL: fp16_vminmaxnm_neg0:
-; CHECK: vldr.16 s0, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s2, s2, s0
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminmaxnm_neg0:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI25_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselgt.f16 s0, s0, s2
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI25_0:
+; ARMEABI-NEXT: .short 0x8000 @ half -0
+;
+; THUMBV7A-LABEL: fp16_vminmaxnm_neg0:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI25_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselgt.f16 s0, s0, s2
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI25_0:
+; THUMBV7A-NEXT: .short 0x8000 @ half -0
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast olt half %0, -0.
@@ -388,11 +1005,39 @@ entry:
}
define half @fp16_vminmaxnm_e_0(i16 signext %a) {
-; CHECK-LABEL: fp16_vminmaxnm_e_0:
-; CHECK: vldr.16 s0, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s2, s2, s0
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminmaxnm_e_0:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vldr.16 s2, .LCPI26_0
+; ARMEABI-NEXT: vcmp.f16 s0, #0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI26_0:
+; ARMEABI-NEXT: .short 0x0000 @ half 0
+;
+; THUMBV7A-LABEL: fp16_vminmaxnm_e_0:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI26_0
+; THUMBV7A-NEXT: vcmp.f16 s0, #0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI26_0:
+; THUMBV7A-NEXT: .short 0x0000 @ half 0
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast ule half 0., %0
@@ -403,11 +1048,39 @@ entry:
}
define half @fp16_vminmaxnm_e_neg0(i16 signext %a) {
-; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
-; CHECK: vldr.16 s0, .LCPI{{.*}}
-; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
-; CHECK: vminnm.f16 s2, s2, s0
-; CHECK: vmaxnm.f16 s0, [[S2]], [[S0]]
+; ARMEABI-LABEL: fp16_vminmaxnm_e_neg0:
+; ARMEABI: @ %bb.0: @ %entry
+; ARMEABI-NEXT: vldr.16 s2, .LCPI27_0
+; ARMEABI-NEXT: vmov.f16 s0, r0
+; ARMEABI-NEXT: vcmp.f16 s0, s2
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vcmp.f16 s2, s0
+; ARMEABI-NEXT: vmrs APSR_nzcv, fpscr
+; ARMEABI-NEXT: vselge.f16 s0, s2, s0
+; ARMEABI-NEXT: vmov r0, s0
+; ARMEABI-NEXT: mov pc, lr
+; ARMEABI-NEXT: .p2align 1
+; ARMEABI-NEXT: @ %bb.1:
+; ARMEABI-NEXT: .LCPI27_0:
+; ARMEABI-NEXT: .short 0x8000 @ half -0
+;
+; THUMBV7A-LABEL: fp16_vminmaxnm_e_neg0:
+; THUMBV7A: @ %bb.0: @ %entry
+; THUMBV7A-NEXT: vldr.16 s2, .LCPI27_0
+; THUMBV7A-NEXT: vmov.f16 s0, r0
+; THUMBV7A-NEXT: vcmp.f16 s0, s2
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vcmp.f16 s2, s0
+; THUMBV7A-NEXT: vmrs APSR_nzcv, fpscr
+; THUMBV7A-NEXT: vselge.f16 s0, s2, s0
+; THUMBV7A-NEXT: vmov r0, s0
+; THUMBV7A-NEXT: bx lr
+; THUMBV7A-NEXT: .p2align 1
+; THUMBV7A-NEXT: @ %bb.1:
+; THUMBV7A-NEXT: .LCPI27_0:
+; THUMBV7A-NEXT: .short 0x8000 @ half -0
entry:
%0 = bitcast i16 %a to half
%cmp1 = fcmp fast ule half -0., %0
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 528bfe0411730..76d350a812ea3 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1410,9 +1410,9 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8-LABEL: pr65820:
; ARMV8: @ %bb.0: @ %entry
; ARMV8-NEXT: vmov d16, r2, r3
-; ARMV8-NEXT: vmov.i32 q9, #0x0
; ARMV8-NEXT: vdup.32 q8, d16[0]
-; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9
+; ARMV8-NEXT: vcgt.f32 q9, q8, #0
+; ARMV8-NEXT: vand q8, q8, q9
; ARMV8-NEXT: vst1.32 {d16, d17}, [r0]
; ARMV8-NEXT: bx lr
;
@@ -1422,7 +1422,8 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8M-NEXT: vmov r1, s0
; ARMV8M-NEXT: vmov.i32 q0, #0x0
; ARMV8M-NEXT: vdup.32 q1, r1
-; ARMV8M-NEXT: vmaxnm.f32 q0, q1, q0
+; ARMV8M-NEXT: vcmp.f32 gt, q1, zr
+; ARMV8M-NEXT: vdupt.32 q0, r1
; ARMV8M-NEXT: vstrw.32 q0, [r0]
; ARMV8M-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index feb23ea1f3982..37dc969dd5df6 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -1,10 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s
; vectors
define <4 x float> @vmaxnmq(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: vmaxnmq:
-; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vmaxnm.f32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x float>, ptr %A
%tmp2 = load <4 x float>, ptr %B
%tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
@@ -13,7 +20,12 @@ define <4 x float> @vmaxnmq(ptr %A, ptr %B) nounwind {
define <2 x float> @vmaxnmd(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: vmaxnmd:
-; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vmaxnm.f32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x float>, ptr %A
%tmp2 = load <2 x float>, ptr %B
%tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
@@ -22,7 +34,13 @@ define <2 x float> @vmaxnmd(ptr %A, ptr %B) nounwind {
define <4 x float> @vminnmq(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: vminnmq:
-; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vminnm.f32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x float>, ptr %A
%tmp2 = load <4 x float>, ptr %B
%tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
@@ -31,7 +49,12 @@ define <4 x float> @vminnmq(ptr %A, ptr %B) nounwind {
define <2 x float> @vminnmd(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: vminnmd:
-; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vminnm.f32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x float>, ptr %A
%tmp2 = load <2 x float>, ptr %B
%tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
@@ -40,129 +63,241 @@ define <2 x float> @vminnmd(ptr %A, ptr %B) nounwind {
; scalars
-define float @fp-armv8_vminnm_o(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_o":
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_o(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_o:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp olt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define double @fp-armv8_vminnm_ole(double %a, double %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_ole":
-; CHECK-NOT: vminnm.f64
+define double @fp_armv8_vminnm_ole(double %a, double %b) {
+; CHECK-LABEL: fp_armv8_vminnm_ole:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%cmp = fcmp ole double %a, %b
%cond = select i1 %cmp, double %a, double %b
ret double %cond
}
-define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_o_rev":
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_o_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_o_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp ogt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
-; CHECK-NOT: vminnm.f64
+define double @fp_armv8_vminnm_oge_rev(double %a, double %b) {
+; CHECK-LABEL: fp_armv8_vminnm_oge_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%cmp = fcmp oge double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}
-define float @fp-armv8_vminnm_u(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_u":
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_u(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_u:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp ult float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vminnm_ule(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_ule":
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_ule(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_ule:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp ule float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_u_rev":
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_u_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_u_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp ugt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
-; CHECK-NOT: vminnm.f64
+define double @fp_armv8_vminnm_uge_rev(double %a, double %b) {
+; CHECK-LABEL: fp_armv8_vminnm_uge_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%cmp = fcmp uge double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}
-define float @fp-armv8_vmaxnm_o(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_o":
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_o(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_o:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp ogt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_oge(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_oge:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp oge float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_o_rev":
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_o_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_o_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp olt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_ole_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_ole_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp ole float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define float @fp-armv8_vmaxnm_u(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_u":
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_u(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_u:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp ugt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_uge(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_uge:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp uge float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_u_rev":
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_u_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_u_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp ult float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
-; CHECK-NOT: vmaxnm.f64
+define double @fp_armv8_vmaxnm_ule_rev(double %a, double %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_ule_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%cmp = fcmp ule double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
@@ -170,10 +305,24 @@ define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
; known non-NaNs
-define float @fp-armv8_vminnm_NNNo(float %a) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNo":
-; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_NNNo(float %a) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNo:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
+; CHECK-NEXT: vldr s4, .LCPI20_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI20_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
%cmp1 = fcmp olt float %a, 12.
%cond1 = select i1 %cmp1, float %a, float 12.
%cmp2 = fcmp olt float 34., %cond1
@@ -181,10 +330,28 @@ define float @fp-armv8_vminnm_NNNo(float %a) {
ret float %cond2
}
-define double @fp-armv8_vminnm_NNNole(double %a) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNole":
-; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
+define double @fp_armv8_vminnm_NNNole(double %a) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNole:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI21_0
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vldr d17, .LCPI21_1
+; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI21_0:
+; CHECK-NEXT: .long 0 @ double 34
+; CHECK-NEXT: .long 1078001664
+; CHECK-NEXT: .LCPI21_1:
+; CHECK-NEXT: .long 0 @ double 56
+; CHECK-NEXT: .long 1078722560
%cmp1 = fcmp ole double %a, 34.
%cond1 = select i1 %cmp1, double %a, double 34.
%cmp2 = fcmp ole double 56., %cond1
@@ -192,10 +359,26 @@ define double @fp-armv8_vminnm_NNNole(double %a) {
ret double %cond2
}
-define float @fp-armv8_vminnm_NNNo_rev(float %a) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev":
-; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_NNNo_rev(float %a) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNo_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI22_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI22_1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI22_0:
+; CHECK-NEXT: .long 0x42600000 @ float 56
+; CHECK-NEXT: .LCPI22_1:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
%cmp1 = fcmp ogt float %a, 56.
%cond1 = select i1 %cmp1, float 56., float %a
%cmp2 = fcmp ogt float 78., %cond1
@@ -203,10 +386,28 @@ define float @fp-armv8_vminnm_NNNo_rev(float %a) {
ret float %cond2
}
-define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev":
-; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
+define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNoge_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, .LCPI23_0
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vldr d17, .LCPI23_1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI23_0:
+; CHECK-NEXT: .long 0 @ double 78
+; CHECK-NEXT: .long 1079214080
+; CHECK-NEXT: .LCPI23_1:
+; CHECK-NEXT: .long 0 @ double 90
+; CHECK-NEXT: .long 1079410688
%cmp1 = fcmp oge double %a, 78.
%cond1 = select i1 %cmp1, double 78., double %a
%cmp2 = fcmp oge double 90., %cond1
@@ -214,10 +415,24 @@ define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
ret double %cond2
}
-define float @fp-armv8_vminnm_NNNu(float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNu":
-; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_NNNu(float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNu:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
+; CHECK-NEXT: vldr s4, .LCPI24_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI24_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
%cmp1 = fcmp ult float 12., %b
%cond1 = select i1 %cmp1, float 12., float %b
%cmp2 = fcmp ult float %cond1, 34.
@@ -225,10 +440,26 @@ define float @fp-armv8_vminnm_NNNu(float %b) {
ret float %cond2
}
-define float @fp-armv8_vminnm_NNNule(float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNule":
-; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_NNNule(float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNule:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI25_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI25_1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI25_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
+; CHECK-NEXT: .LCPI25_1:
+; CHECK-NEXT: .long 0x42600000 @ float 56
%cmp1 = fcmp ule float 34., %b
%cond1 = select i1 %cmp1, float 34., float %b
%cmp2 = fcmp ule float %cond1, 56.
@@ -236,10 +467,26 @@ define float @fp-armv8_vminnm_NNNule(float %b) {
ret float %cond2
}
-define float @fp-armv8_vminnm_NNNu_rev(float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev":
-; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
+define float @fp_armv8_vminnm_NNNu_rev(float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNu_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI26_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI26_1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI26_0:
+; CHECK-NEXT: .long 0x42600000 @ float 56
+; CHECK-NEXT: .LCPI26_1:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
%cmp1 = fcmp ugt float 56., %b
%cond1 = select i1 %cmp1, float %b, float 56.
%cmp2 = fcmp ugt float %cond1, 78.
@@ -247,10 +494,28 @@ define float @fp-armv8_vminnm_NNNu_rev(float %b) {
ret float %cond2
}
-define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev":
-; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
+define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNuge_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, .LCPI27_0
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d17
+; CHECK-NEXT: vldr d17, .LCPI27_1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI27_0:
+; CHECK-NEXT: .long 0 @ double 78
+; CHECK-NEXT: .long 1079214080
+; CHECK-NEXT: .LCPI27_1:
+; CHECK-NEXT: .long 0 @ double 90
+; CHECK-NEXT: .long 1079410688
%cmp1 = fcmp uge double 78., %b
%cond1 = select i1 %cmp1, double %b, double 78.
%cmp2 = fcmp uge double %cond1, 90.
@@ -258,10 +523,24 @@ define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
ret double %cond2
}
-define float @fp-armv8_vmaxnm_NNNo(float %a) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo":
-; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNo(float %a) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
+; CHECK-NEXT: vldr s4, .LCPI28_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI28_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
%cmp1 = fcmp ogt float %a, 12.
%cond1 = select i1 %cmp1, float %a, float 12.
%cmp2 = fcmp ogt float 34., %cond1
@@ -269,10 +548,26 @@ define float @fp-armv8_vmaxnm_NNNo(float %a) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNoge(float %a) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge":
-; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNoge(float %a) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI29_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI29_1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI29_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
+; CHECK-NEXT: .LCPI29_1:
+; CHECK-NEXT: .long 0x42600000 @ float 56
%cmp1 = fcmp oge float %a, 34.
%cond1 = select i1 %cmp1, float %a, float 34.
%cmp2 = fcmp oge float 56., %cond1
@@ -280,10 +575,26 @@ define float @fp-armv8_vmaxnm_NNNoge(float %a) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
-; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNo_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI30_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI30_1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI30_0:
+; CHECK-NEXT: .long 0x42600000 @ float 56
+; CHECK-NEXT: .LCPI30_1:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
%cmp1 = fcmp olt float %a, 56.
%cond1 = select i1 %cmp1, float 56., float %a
%cmp2 = fcmp olt float 78., %cond1
@@ -291,10 +602,26 @@ define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
-; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNole_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI31_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI31_1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI31_0:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
+; CHECK-NEXT: .LCPI31_1:
+; CHECK-NEXT: .long 0x42b40000 @ float 90
%cmp1 = fcmp ole float %a, 78.
%cond1 = select i1 %cmp1, float 78., float %a
%cmp2 = fcmp ole float 90., %cond1
@@ -302,10 +629,24 @@ define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNu(float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu":
-; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNu(float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
+; CHECK-NEXT: vldr s4, .LCPI32_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI32_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
%cmp1 = fcmp ugt float 12., %b
%cond1 = select i1 %cmp1, float 12., float %b
%cmp2 = fcmp ugt float %cond1, 34.
@@ -313,10 +654,26 @@ define float @fp-armv8_vmaxnm_NNNu(float %b) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNuge(float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge":
-; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNuge(float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNuge:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI33_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI33_1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI33_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
+; CHECK-NEXT: .LCPI33_1:
+; CHECK-NEXT: .long 0x42600000 @ float 56
%cmp1 = fcmp uge float 34., %b
%cond1 = select i1 %cmp1, float 34., float %b
%cmp2 = fcmp uge float %cond1, 56.
@@ -324,10 +681,26 @@ define float @fp-armv8_vmaxnm_NNNuge(float %b) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
-; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNu_rev(float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNu_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI34_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI34_1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI34_0:
+; CHECK-NEXT: .long 0x42600000 @ float 56
+; CHECK-NEXT: .LCPI34_1:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
%cmp1 = fcmp ult float 56., %b
%cond1 = select i1 %cmp1, float %b, float 56.
%cmp2 = fcmp ult float %cond1, 78.
@@ -335,10 +708,28 @@ define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
ret float %cond2
}
-define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
-; CHECK: vmaxnm.f64
-; CHECK-NOT: vmaxnm.f64
+define double @fp_armv8_vmaxnm_NNNule_rev( double %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNule_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI35_0
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d17, d16
+; CHECK-NEXT: vldr d17, .LCPI35_1
+; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI35_0:
+; CHECK-NEXT: .long 0 @ double 78
+; CHECK-NEXT: .long 1079214080
+; CHECK-NEXT: .LCPI35_1:
+; CHECK-NEXT: .long 0 @ double 90
+; CHECK-NEXT: .long 1079410688
%cmp1 = fcmp ule double 78., %b
%cond1 = select i1 %cmp1, double %b, double 78.
%cmp2 = fcmp ule double %cond1, 90.
@@ -346,10 +737,24 @@ define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
ret double %cond2
}
-define float @fp-armv8_vminmaxnm_0(float %a) {
-; CHECK-LABEL: "fp-armv8_vminmaxnm_0":
-; CHECK-NOT: vminnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vminmaxnm_0(float %a) {
+; CHECK-LABEL: fp_armv8_vminmaxnm_0:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s0, .LCPI36_0
+; CHECK-NEXT: vcmp.f32 s2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov.f32 s4, s0
+; CHECK-NEXT: vmovlt.f32 s4, s2
+; CHECK-NEXT: vcmp.f32 s4, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI36_0:
+; CHECK-NEXT: .long 0x00000000 @ float 0
%cmp1 = fcmp ult float %a, 0.
%cond1 = select i1 %cmp1, float %a, float 0.
%cmp2 = fcmp ogt float %cond1, 0.
@@ -357,10 +762,23 @@ define float @fp-armv8_vminmaxnm_0(float %a) {
ret float %cond2
}
-define float @fp-armv8_vminmaxnm_neg0(float %a) {
-; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0":
-; CHECK: vminnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vminmaxnm_neg0(float %a) {
+; CHECK-LABEL: fp_armv8_vminmaxnm_neg0:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI37_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s2, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI37_0:
+; CHECK-NEXT: .long 0x80000000 @ float -0
%cmp1 = fcmp olt float %a, -0.
%cond1 = select i1 %cmp1, float %a, float -0.
%cmp2 = fcmp ugt float %cond1, -0.
@@ -368,10 +786,23 @@ define float @fp-armv8_vminmaxnm_neg0(float %a) {
ret float %cond2
}
-define float @fp-armv8_vminmaxnm_e_0(float %a) {
-; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0":
-; CHECK-NOT: vminnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vminmaxnm_e_0(float %a) {
+; CHECK-LABEL: fp_armv8_vminmaxnm_e_0:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vldr s2, .LCPI38_0
+; CHECK-NEXT: vcmp.f32 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmovle.f32 s0, s2
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI38_0:
+; CHECK-NEXT: .long 0x00000000 @ float 0
%cmp1 = fcmp nsz ole float 0., %a
%cond1 = select i1 %cmp1, float 0., float %a
%cmp2 = fcmp nsz uge float 0., %cond1
@@ -379,10 +810,23 @@ define float @fp-armv8_vminmaxnm_e_0(float %a) {
ret float %cond2
}
-define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
-; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0":
-; CHECK: vminnm.f32
-; CHECK-NOT: vmaxnm.f32
+define float @fp_armv8_vminmaxnm_e_neg0(float %a) {
+; CHECK-LABEL: fp_armv8_vminmaxnm_e_neg0:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI39_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s2, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI39_0:
+; CHECK-NEXT: .long 0x80000000 @ float -0
%cmp1 = fcmp nsz ule float -0., %a
%cond1 = select i1 %cmp1, float -0., float %a
%cmp2 = fcmp nsz oge float -0., %cond1
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm.ll b/llvm/test/CodeGen/ARM/vminmaxnm.ll
index a6803fc78d8ce..93e37aa84e9b7 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm.ll
@@ -1,146 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s
; scalars
-define float @fp-armv8_vminnm_o(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_o":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_o(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_o:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast olt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define double @fp-armv8_vminnm_ole(double %a, double %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_ole":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f64
+define double @fp_armv8_vminnm_ole(double %a, double %b) {
+; CHECK-LABEL: fp_armv8_vminnm_ole:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ole double %a, %b
%cond = select i1 %cmp, double %a, double %b
ret double %cond
}
-define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_o_rev":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_o_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_o_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ogt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f64
+define double @fp_armv8_vminnm_oge_rev(double %a, double %b) {
+; CHECK-LABEL: fp_armv8_vminnm_oge_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%cmp = fcmp fast oge double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}
-define float @fp-armv8_vminnm_u(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_u":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_u(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_u:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ult float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vminnm_ule(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_ule":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_ule(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_ule:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ule float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_u_rev":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_u_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_u_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ugt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f64
+define double @fp_armv8_vminnm_uge_rev(double %a, double %b) {
+; CHECK-LABEL: fp_armv8_vminnm_uge_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%cmp = fcmp fast uge double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}
-define float @fp-armv8_vmaxnm_o(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_o":
-; CHECK-NOT: vcmp
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_o(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_o:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ogt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
-; CHECK-NOT: vcmp
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_oge(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_oge:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast oge float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_o_rev":
-; CHECK-NOT: vcmp
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_o_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_o_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast olt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
-; CHECK-NOT: vcmp
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_ole_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_ole_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ole float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define float @fp-armv8_vmaxnm_u(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_u":
-; CHECK-NOT: vcmp
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_u(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_u:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ugt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
-; CHECK-NOT: vcmp
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_uge(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_uge:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast uge float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
-define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_u_rev":
-; CHECK-NOT: vcmp
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_u_rev(float %a, float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_u_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ult float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
-define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
-; CHECK-NOT: vcmp
-; CHECK: vmaxnm.f64
+define double @fp_armv8_vmaxnm_ule_rev(double %a, double %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_ule_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%cmp = fcmp fast ule double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
@@ -148,10 +245,24 @@ define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
; known non-NaNs
-define float @fp-armv8_vminnm_NNNo(float %a) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNo":
-; CHECK: vminnm.f32
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_NNNo(float %a) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNo:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
+; CHECK-NEXT: vldr s4, .LCPI16_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI16_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
%cmp1 = fcmp fast olt float %a, 12.
%cond1 = select i1 %cmp1, float %a, float 12.
%cmp2 = fcmp fast olt float 34., %cond1
@@ -159,10 +270,28 @@ define float @fp-armv8_vminnm_NNNo(float %a) {
ret float %cond2
}
-define double @fp-armv8_vminnm_NNNole(double %a) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNole":
-; CHECK: vminnm.f64
-; CHECK: vminnm.f64
+define double @fp_armv8_vminnm_NNNole(double %a) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNole:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI17_0
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vldr d17, .LCPI17_1
+; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI17_0:
+; CHECK-NEXT: .long 0 @ double 34
+; CHECK-NEXT: .long 1078001664
+; CHECK-NEXT: .LCPI17_1:
+; CHECK-NEXT: .long 0 @ double 56
+; CHECK-NEXT: .long 1078722560
%cmp1 = fcmp fast ole double %a, 34.
%cond1 = select i1 %cmp1, double %a, double 34.
%cmp2 = fcmp fast ole double 56., %cond1
@@ -170,10 +299,26 @@ define double @fp-armv8_vminnm_NNNole(double %a) {
ret double %cond2
}
-define float @fp-armv8_vminnm_NNNo_rev(float %a) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev":
-; CHECK: vminnm.f32
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_NNNo_rev(float %a) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNo_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI18_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI18_1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI18_0:
+; CHECK-NEXT: .long 0x42600000 @ float 56
+; CHECK-NEXT: .LCPI18_1:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
%cmp1 = fcmp fast ogt float %a, 56.
%cond1 = select i1 %cmp1, float 56., float %a
%cmp2 = fcmp fast ogt float 78., %cond1
@@ -181,10 +326,28 @@ define float @fp-armv8_vminnm_NNNo_rev(float %a) {
ret float %cond2
}
-define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev":
-; CHECK: vminnm.f64
-; CHECK: vminnm.f64
+define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNoge_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, .LCPI19_0
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vldr d17, .LCPI19_1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI19_0:
+; CHECK-NEXT: .long 0 @ double 78
+; CHECK-NEXT: .long 1079214080
+; CHECK-NEXT: .LCPI19_1:
+; CHECK-NEXT: .long 0 @ double 90
+; CHECK-NEXT: .long 1079410688
%cmp1 = fcmp fast oge double %a, 78.
%cond1 = select i1 %cmp1, double 78., double %a
%cmp2 = fcmp fast oge double 90., %cond1
@@ -192,10 +355,24 @@ define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
ret double %cond2
}
-define float @fp-armv8_vminnm_NNNu(float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNu":
-; CHECK: vminnm.f32
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_NNNu(float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNu:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
+; CHECK-NEXT: vldr s4, .LCPI20_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI20_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
%cmp1 = fcmp fast ult float 12., %b
%cond1 = select i1 %cmp1, float 12., float %b
%cmp2 = fcmp fast ult float %cond1, 34.
@@ -203,10 +380,26 @@ define float @fp-armv8_vminnm_NNNu(float %b) {
ret float %cond2
}
-define float @fp-armv8_vminnm_NNNule(float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNule":
-; CHECK: vminnm.f32
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_NNNule(float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNule:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI21_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI21_1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI21_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
+; CHECK-NEXT: .LCPI21_1:
+; CHECK-NEXT: .long 0x42600000 @ float 56
%cmp1 = fcmp fast ule float 34., %b
%cond1 = select i1 %cmp1, float 34., float %b
%cmp2 = fcmp fast ule float %cond1, 56.
@@ -214,10 +407,26 @@ define float @fp-armv8_vminnm_NNNule(float %b) {
ret float %cond2
}
-define float @fp-armv8_vminnm_NNNu_rev(float %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev":
-; CHECK: vminnm.f32
-; CHECK: vminnm.f32
+define float @fp_armv8_vminnm_NNNu_rev(float %b) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNu_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI22_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI22_1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI22_0:
+; CHECK-NEXT: .long 0x42600000 @ float 56
+; CHECK-NEXT: .LCPI22_1:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
%cmp1 = fcmp fast ugt float 56., %b
%cond1 = select i1 %cmp1, float %b, float 56.
%cmp2 = fcmp fast ugt float %cond1, 78.
@@ -225,10 +434,28 @@ define float @fp-armv8_vminnm_NNNu_rev(float %b) {
ret float %cond2
}
-define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
-; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev":
-; CHECK: vminnm.f64
-; CHECK: vminnm.f64
+define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
+; CHECK-LABEL: fp_armv8_vminnm_NNNuge_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI23_0
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vldr d17, .LCPI23_1
+; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI23_0:
+; CHECK-NEXT: .long 0 @ double 78
+; CHECK-NEXT: .long 1079214080
+; CHECK-NEXT: .LCPI23_1:
+; CHECK-NEXT: .long 0 @ double 90
+; CHECK-NEXT: .long 1079410688
%cmp1 = fcmp fast uge double 78., %b
%cond1 = select i1 %cmp1, double %b, double 78.
%cmp2 = fcmp fast uge double %cond1, 90.
@@ -236,10 +463,24 @@ define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
ret double %cond2
}
-define float @fp-armv8_vmaxnm_NNNo(float %a) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo":
-; CHECK: vmaxnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNo(float %a) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
+; CHECK-NEXT: vldr s4, .LCPI24_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI24_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
%cmp1 = fcmp fast ogt float %a, 12.
%cond1 = select i1 %cmp1, float %a, float 12.
%cmp2 = fcmp fast ogt float 34., %cond1
@@ -247,10 +488,26 @@ define float @fp-armv8_vmaxnm_NNNo(float %a) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNoge(float %a) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge":
-; CHECK: vmaxnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNoge(float %a) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI25_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI25_1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI25_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
+; CHECK-NEXT: .LCPI25_1:
+; CHECK-NEXT: .long 0x42600000 @ float 56
%cmp1 = fcmp fast oge float %a, 34.
%cond1 = select i1 %cmp1, float %a, float 34.
%cmp2 = fcmp fast oge float 56., %cond1
@@ -258,10 +515,26 @@ define float @fp-armv8_vmaxnm_NNNoge(float %a) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
-; CHECK: vmaxnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNo_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI26_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI26_1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI26_0:
+; CHECK-NEXT: .long 0x42600000 @ float 56
+; CHECK-NEXT: .LCPI26_1:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
%cmp1 = fcmp fast olt float %a, 56.
%cond1 = select i1 %cmp1, float 56., float %a
%cmp2 = fcmp fast olt float 78., %cond1
@@ -269,10 +542,26 @@ define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
-; CHECK: vmaxnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNole_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI27_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI27_1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI27_0:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
+; CHECK-NEXT: .LCPI27_1:
+; CHECK-NEXT: .long 0x42b40000 @ float 90
%cmp1 = fcmp fast ole float %a, 78.
%cond1 = select i1 %cmp1, float 78., float %a
%cmp2 = fcmp fast ole float 90., %cond1
@@ -280,10 +569,24 @@ define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNu(float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu":
-; CHECK: vmaxnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNu(float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
+; CHECK-NEXT: vldr s4, .LCPI28_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI28_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
%cmp1 = fcmp fast ugt float 12., %b
%cond1 = select i1 %cmp1, float 12., float %b
%cmp2 = fcmp fast ugt float %cond1, 34.
@@ -291,10 +594,26 @@ define float @fp-armv8_vmaxnm_NNNu(float %b) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNuge(float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge":
-; CHECK: vmaxnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNuge(float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNuge:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI29_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI29_1
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI29_0:
+; CHECK-NEXT: .long 0x42080000 @ float 34
+; CHECK-NEXT: .LCPI29_1:
+; CHECK-NEXT: .long 0x42600000 @ float 56
%cmp1 = fcmp fast uge float 34., %b
%cond1 = select i1 %cmp1, float 34., float %b
%cmp2 = fcmp fast uge float %cond1, 56.
@@ -302,10 +621,26 @@ define float @fp-armv8_vmaxnm_NNNuge(float %b) {
ret float %cond2
}
-define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
-; CHECK: vmaxnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vmaxnm_NNNu_rev(float %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNu_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI30_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI30_1
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI30_0:
+; CHECK-NEXT: .long 0x42600000 @ float 56
+; CHECK-NEXT: .LCPI30_1:
+; CHECK-NEXT: .long 0x429c0000 @ float 78
%cmp1 = fcmp fast ult float 56., %b
%cond1 = select i1 %cmp1, float %b, float 56.
%cmp2 = fcmp fast ult float %cond1, 78.
@@ -313,10 +648,28 @@ define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
ret float %cond2
}
-define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
-; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
-; CHECK: vmaxnm.f64
-; CHECK: vmaxnm.f64
+define double @fp_armv8_vmaxnm_NNNule_rev( double %b) {
+; CHECK-LABEL: fp_armv8_vmaxnm_NNNule_rev:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, .LCPI31_0
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vldr d17, .LCPI31_1
+; CHECK-NEXT: vcmp.f64 d17, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI31_0:
+; CHECK-NEXT: .long 0 @ double 78
+; CHECK-NEXT: .long 1079214080
+; CHECK-NEXT: .LCPI31_1:
+; CHECK-NEXT: .long 0 @ double 90
+; CHECK-NEXT: .long 1079410688
%cmp1 = fcmp fast ule double 78., %b
%cond1 = select i1 %cmp1, double %b, double 78.
%cmp2 = fcmp fast ule double %cond1, 90.
@@ -324,11 +677,24 @@ define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
ret double %cond2
}
-define float @fp-armv8_vminmaxnm_0(float %a) {
-; CHECK-LABEL: "fp-armv8_vminmaxnm_0":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vminmaxnm_0(float %a) {
+; CHECK-LABEL: fp_armv8_vminmaxnm_0:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s0, .LCPI32_0
+; CHECK-NEXT: vcmp.f32 s2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov.f32 s4, s0
+; CHECK-NEXT: vmovlt.f32 s4, s2
+; CHECK-NEXT: vcmp.f32 s4, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI32_0:
+; CHECK-NEXT: .long 0x00000000 @ float 0
%cmp1 = fcmp fast olt float %a, 0.
%cond1 = select i1 %cmp1, float %a, float 0.
%cmp2 = fcmp fast ogt float %cond1, 0.
@@ -336,11 +702,23 @@ define float @fp-armv8_vminmaxnm_0(float %a) {
ret float %cond2
}
-define float @fp-armv8_vminmaxnm_neg0(float %a) {
-; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vminmaxnm_neg0(float %a) {
+; CHECK-LABEL: fp_armv8_vminmaxnm_neg0:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI33_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s2, s2, s0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI33_0:
+; CHECK-NEXT: .long 0x80000000 @ float -0
%cmp1 = fcmp fast olt float %a, -0.
%cond1 = select i1 %cmp1, float %a, float -0.
%cmp2 = fcmp fast ugt float %cond1, -0.
@@ -348,11 +726,23 @@ define float @fp-armv8_vminmaxnm_neg0(float %a) {
ret float %cond2
}
-define float @fp-armv8_vminmaxnm_e_0(float %a) {
-; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vminmaxnm_e_0(float %a) {
+; CHECK-LABEL: fp_armv8_vminmaxnm_e_0:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vldr s2, .LCPI34_0
+; CHECK-NEXT: vcmp.f32 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmovle.f32 s0, s2
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI34_0:
+; CHECK-NEXT: .long 0x00000000 @ float 0
%cmp1 = fcmp fast ule float 0., %a
%cond1 = select i1 %cmp1, float 0., float %a
%cmp2 = fcmp fast uge float 0., %cond1
@@ -360,19 +750,26 @@ define float @fp-armv8_vminmaxnm_e_0(float %a) {
ret float %cond2
}
-define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
-; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0":
-; CHECK-NOT: vcmp
-; CHECK: vminnm.f32
-; CHECK: vmaxnm.f32
+define float @fp_armv8_vminmaxnm_e_neg0(float %a) {
+; CHECK-LABEL: fp_armv8_vminmaxnm_e_neg0:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, .LCPI35_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s2, s0, s2
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI35_0:
+; CHECK-NEXT: .long 0x80000000 @ float -0
%cmp1 = fcmp fast ule float -0., %a
%cond1 = select i1 %cmp1, float -0., float %a
%cmp2 = fcmp fast oge float -0., %cond1
%cond2 = select i1 %cmp2, float -0., float %cond1
ret float %cond2
}
-
-declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/PowerPC/vec-min-max.ll b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
index db83fd187bed3..ff8f0c1737525 100644
--- a/llvm/test/CodeGen/PowerPC/vec-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
@@ -91,12 +91,14 @@ entry:
define <4 x float> @getsmaxf32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: getsmaxf32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmaxsp 34, 34, 35
+; CHECK-NEXT: xvcmpgesp 0, 34, 35
+; CHECK-NEXT: xxsel 34, 35, 34, 0
; CHECK-NEXT: blr
;
; NOP8VEC-LABEL: getsmaxf32:
; NOP8VEC: # %bb.0: # %entry
-; NOP8VEC-NEXT: xvmaxsp 34, 34, 35
+; NOP8VEC-NEXT: xvcmpgesp 0, 34, 35
+; NOP8VEC-NEXT: xxsel 34, 35, 34, 0
; NOP8VEC-NEXT: blr
entry:
%0 = fcmp nnan nsz oge <4 x float> %a, %b
@@ -107,12 +109,14 @@ entry:
define <2 x double> @getsmaxf64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: getsmaxf64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmaxdp 34, 34, 35
+; CHECK-NEXT: xvcmpgedp 36, 34, 35
+; CHECK-NEXT: xxsel 34, 35, 34, 36
; CHECK-NEXT: blr
;
; NOP8VEC-LABEL: getsmaxf64:
; NOP8VEC: # %bb.0: # %entry
-; NOP8VEC-NEXT: xvmaxdp 34, 34, 35
+; NOP8VEC-NEXT: xvcmpgedp 0, 34, 35
+; NOP8VEC-NEXT: xxsel 34, 35, 34, 0
; NOP8VEC-NEXT: blr
entry:
%0 = fcmp nnan nsz oge <2 x double> %a, %b
@@ -208,12 +212,14 @@ entry:
define <4 x float> @getsminf32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: getsminf32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvminsp 34, 34, 35
+; CHECK-NEXT: xvcmpgesp 0, 35, 34
+; CHECK-NEXT: xxsel 34, 35, 34, 0
; CHECK-NEXT: blr
;
; NOP8VEC-LABEL: getsminf32:
; NOP8VEC: # %bb.0: # %entry
-; NOP8VEC-NEXT: xvminsp 34, 34, 35
+; NOP8VEC-NEXT: xvcmpgesp 0, 35, 34
+; NOP8VEC-NEXT: xxsel 34, 35, 34, 0
; NOP8VEC-NEXT: blr
entry:
%0 = fcmp nnan nsz ole <4 x float> %a, %b
@@ -224,12 +230,14 @@ entry:
define <2 x double> @getsminf64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: getsminf64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmindp 34, 34, 35
+; CHECK-NEXT: xvcmpgedp 36, 35, 34
+; CHECK-NEXT: xxsel 34, 35, 34, 36
; CHECK-NEXT: blr
;
; NOP8VEC-LABEL: getsminf64:
; NOP8VEC: # %bb.0: # %entry
-; NOP8VEC-NEXT: xvmindp 34, 34, 35
+; NOP8VEC-NEXT: xvcmpgedp 0, 35, 34
+; NOP8VEC-NEXT: xxsel 34, 35, 34, 0
; NOP8VEC-NEXT: blr
entry:
%0 = fcmp nnan nsz ole <2 x double> %a, %b
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-fmax.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-fmax.ll
index b1f72f694aea5..4ffbf64eb03b6 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-fmax.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-fmax.ll
@@ -379,51 +379,71 @@ entry:
define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f32_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: xvmaxsp vs0, v3, v5
-; PWR9LE-NEXT: xvmaxsp vs1, v2, v4
-; PWR9LE-NEXT: xvmaxsp vs0, vs1, vs0
-; PWR9LE-NEXT: xxswapd v2, vs0
-; PWR9LE-NEXT: xvmaxsp vs0, vs0, v2
+; PWR9LE-NEXT: xvcmpgtsp vs0, v2, v4
+; PWR9LE-NEXT: xvcmpgtsp vs1, v3, v5
+; PWR9LE-NEXT: xxsel vs1, v5, v3, vs1
+; PWR9LE-NEXT: xxsel vs0, v4, v2, vs0
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
+; PWR9LE-NEXT: xxswapd vs1, vs0
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
-; PWR9LE-NEXT: xvmaxsp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR9LE-NEXT: xscvspdpn f1, vs0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f32_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: xvmaxsp vs0, v3, v5
-; PWR9BE-NEXT: xvmaxsp vs1, v2, v4
-; PWR9BE-NEXT: xvmaxsp vs0, vs1, vs0
-; PWR9BE-NEXT: xxswapd v2, vs0
-; PWR9BE-NEXT: xvmaxsp vs0, vs0, v2
+; PWR9BE-NEXT: xvcmpgtsp vs0, v2, v4
+; PWR9BE-NEXT: xvcmpgtsp vs1, v3, v5
+; PWR9BE-NEXT: xxsel vs1, v5, v3, vs1
+; PWR9BE-NEXT: xxsel vs0, v4, v2, vs0
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
+; PWR9BE-NEXT: xxswapd vs1, vs0
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
-; PWR9BE-NEXT: xvmaxsp vs0, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9BE-NEXT: xscvspdpn f1, vs0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f32_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: xvmaxsp vs0, v3, v5
-; PWR10LE-NEXT: xvmaxsp vs1, v2, v4
-; PWR10LE-NEXT: xvmaxsp vs0, vs1, vs0
-; PWR10LE-NEXT: xxswapd v2, vs0
-; PWR10LE-NEXT: xvmaxsp vs0, vs0, v2
+; PWR10LE-NEXT: xvcmpgtsp vs0, v2, v4
+; PWR10LE-NEXT: xvcmpgtsp vs1, v3, v5
+; PWR10LE-NEXT: xxsel vs1, v5, v3, vs1
+; PWR10LE-NEXT: xxsel vs0, v4, v2, vs0
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
+; PWR10LE-NEXT: xxswapd vs1, vs0
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
-; PWR10LE-NEXT: xvmaxsp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR10LE-NEXT: xscvspdpn f1, vs0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f32_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: xvmaxsp vs0, v3, v5
-; PWR10BE-NEXT: xvmaxsp vs1, v2, v4
-; PWR10BE-NEXT: xvmaxsp vs0, vs1, vs0
-; PWR10BE-NEXT: xxswapd v2, vs0
-; PWR10BE-NEXT: xvmaxsp vs0, vs0, v2
+; PWR10BE-NEXT: xvcmpgtsp vs0, v2, v4
+; PWR10BE-NEXT: xvcmpgtsp vs1, v3, v5
+; PWR10BE-NEXT: xxsel vs1, v5, v3, vs1
+; PWR10BE-NEXT: xxsel vs0, v4, v2, vs0
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
+; PWR10BE-NEXT: xxswapd vs1, vs0
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
-; PWR10BE-NEXT: xvmaxsp vs0, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10BE-NEXT: xscvspdpn f1, vs0
; PWR10BE-NEXT: blr
entry:
@@ -523,67 +543,103 @@ entry:
define dso_local float @v32f32_fast(<32 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32f32_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: xvmaxsp vs0, v4, v8
-; PWR9LE-NEXT: xvmaxsp vs1, v2, v6
-; PWR9LE-NEXT: xvmaxsp vs2, v5, v9
-; PWR9LE-NEXT: xvmaxsp vs3, v3, v7
-; PWR9LE-NEXT: xvmaxsp vs2, vs3, vs2
-; PWR9LE-NEXT: xvmaxsp vs0, vs1, vs0
-; PWR9LE-NEXT: xvmaxsp vs0, vs0, vs2
-; PWR9LE-NEXT: xxswapd v2, vs0
-; PWR9LE-NEXT: xvmaxsp vs0, vs0, v2
+; PWR9LE-NEXT: xvcmpgtsp vs0, v2, v6
+; PWR9LE-NEXT: xvcmpgtsp vs1, v4, v8
+; PWR9LE-NEXT: xvcmpgtsp vs2, v3, v7
+; PWR9LE-NEXT: xvcmpgtsp vs3, v5, v9
+; PWR9LE-NEXT: xxsel vs3, v9, v5, vs3
+; PWR9LE-NEXT: xxsel vs2, v7, v3, vs2
+; PWR9LE-NEXT: xxsel vs1, v8, v4, vs1
+; PWR9LE-NEXT: xxsel vs0, v6, v2, vs0
+; PWR9LE-NEXT: xvcmpgtsp vs4, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtsp vs5, vs2, vs3
+; PWR9LE-NEXT: xxsel vs2, vs3, vs2, vs5
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs4
+; PWR9LE-NEXT: xvcmpgtsp vs1, vs0, vs2
+; PWR9LE-NEXT: xxsel vs0, vs2, vs0, vs1
+; PWR9LE-NEXT: xxswapd vs1, vs0
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
-; PWR9LE-NEXT: xvmaxsp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR9LE-NEXT: xscvspdpn f1, vs0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v32f32_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: xvmaxsp vs0, v4, v8
-; PWR9BE-NEXT: xvmaxsp vs1, v2, v6
-; PWR9BE-NEXT: xvmaxsp vs2, v5, v9
-; PWR9BE-NEXT: xvmaxsp vs3, v3, v7
-; PWR9BE-NEXT: xvmaxsp vs2, vs3, vs2
-; PWR9BE-NEXT: xvmaxsp vs0, vs1, vs0
-; PWR9BE-NEXT: xvmaxsp vs0, vs0, vs2
-; PWR9BE-NEXT: xxswapd v2, vs0
-; PWR9BE-NEXT: xvmaxsp vs0, vs0, v2
+; PWR9BE-NEXT: xvcmpgtsp vs0, v2, v6
+; PWR9BE-NEXT: xvcmpgtsp vs1, v4, v8
+; PWR9BE-NEXT: xvcmpgtsp vs2, v3, v7
+; PWR9BE-NEXT: xvcmpgtsp vs3, v5, v9
+; PWR9BE-NEXT: xxsel vs3, v9, v5, vs3
+; PWR9BE-NEXT: xxsel vs2, v7, v3, vs2
+; PWR9BE-NEXT: xxsel vs1, v8, v4, vs1
+; PWR9BE-NEXT: xxsel vs0, v6, v2, vs0
+; PWR9BE-NEXT: xvcmpgtsp vs4, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtsp vs5, vs2, vs3
+; PWR9BE-NEXT: xxsel vs2, vs3, vs2, vs5
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs4
+; PWR9BE-NEXT: xvcmpgtsp vs1, vs0, vs2
+; PWR9BE-NEXT: xxsel vs0, vs2, vs0, vs1
+; PWR9BE-NEXT: xxswapd vs1, vs0
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
-; PWR9BE-NEXT: xvmaxsp vs0, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9BE-NEXT: xscvspdpn f1, vs0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v32f32_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: xvmaxsp vs0, v4, v8
-; PWR10LE-NEXT: xvmaxsp vs1, v2, v6
-; PWR10LE-NEXT: xvmaxsp vs2, v5, v9
-; PWR10LE-NEXT: xvmaxsp vs3, v3, v7
-; PWR10LE-NEXT: xvmaxsp vs2, vs3, vs2
-; PWR10LE-NEXT: xvmaxsp vs0, vs1, vs0
-; PWR10LE-NEXT: xvmaxsp vs0, vs0, vs2
-; PWR10LE-NEXT: xxswapd v2, vs0
-; PWR10LE-NEXT: xvmaxsp vs0, vs0, v2
+; PWR10LE-NEXT: xvcmpgtsp vs0, v2, v6
+; PWR10LE-NEXT: xvcmpgtsp vs1, v4, v8
+; PWR10LE-NEXT: xvcmpgtsp vs2, v3, v7
+; PWR10LE-NEXT: xvcmpgtsp vs3, v5, v9
+; PWR10LE-NEXT: xxsel vs3, v9, v5, vs3
+; PWR10LE-NEXT: xxsel vs2, v7, v3, vs2
+; PWR10LE-NEXT: xxsel vs1, v8, v4, vs1
+; PWR10LE-NEXT: xxsel vs0, v6, v2, vs0
+; PWR10LE-NEXT: xvcmpgtsp vs4, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtsp vs5, vs2, vs3
+; PWR10LE-NEXT: xxsel vs2, vs3, vs2, vs5
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs4
+; PWR10LE-NEXT: xvcmpgtsp vs1, vs0, vs2
+; PWR10LE-NEXT: xxsel vs0, vs2, vs0, vs1
+; PWR10LE-NEXT: xxswapd vs1, vs0
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
-; PWR10LE-NEXT: xvmaxsp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR10LE-NEXT: xscvspdpn f1, vs0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v32f32_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: xvmaxsp vs0, v4, v8
-; PWR10BE-NEXT: xvmaxsp vs1, v2, v6
-; PWR10BE-NEXT: xvmaxsp vs2, v5, v9
-; PWR10BE-NEXT: xvmaxsp vs3, v3, v7
-; PWR10BE-NEXT: xvmaxsp vs2, vs3, vs2
-; PWR10BE-NEXT: xvmaxsp vs0, vs1, vs0
-; PWR10BE-NEXT: xvmaxsp vs0, vs0, vs2
-; PWR10BE-NEXT: xxswapd v2, vs0
-; PWR10BE-NEXT: xvmaxsp vs0, vs0, v2
+; PWR10BE-NEXT: xvcmpgtsp vs0, v2, v6
+; PWR10BE-NEXT: xvcmpgtsp vs1, v4, v8
+; PWR10BE-NEXT: xvcmpgtsp vs2, v3, v7
+; PWR10BE-NEXT: xvcmpgtsp vs3, v5, v9
+; PWR10BE-NEXT: xxsel vs3, v9, v5, vs3
+; PWR10BE-NEXT: xxsel vs2, v7, v3, vs2
+; PWR10BE-NEXT: xxsel vs1, v8, v4, vs1
+; PWR10BE-NEXT: xxsel vs0, v6, v2, vs0
+; PWR10BE-NEXT: xvcmpgtsp vs4, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtsp vs5, vs2, vs3
+; PWR10BE-NEXT: xxsel vs2, vs3, vs2, vs5
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs4
+; PWR10BE-NEXT: xvcmpgtsp vs1, vs0, vs2
+; PWR10BE-NEXT: xxsel vs0, vs2, vs0, vs1
+; PWR10BE-NEXT: xxswapd vs1, vs0
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
-; PWR10BE-NEXT: xvmaxsp vs0, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10BE-NEXT: xscvspdpn f1, vs0
; PWR10BE-NEXT: blr
entry:
@@ -780,43 +836,59 @@ entry:
define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8f64_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: xvmaxdp vs0, v3, v5
-; PWR9LE-NEXT: xvmaxdp vs1, v2, v4
-; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0
+; PWR9LE-NEXT: xvcmpgtdp v0, v2, v4
+; PWR9LE-NEXT: xvcmpgtdp v1, v3, v5
+; PWR9LE-NEXT: xxsel vs0, v5, v3, v1
+; PWR9LE-NEXT: xxsel vs1, v4, v2, v0
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
-; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8f64_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: xvmaxdp vs0, v3, v5
-; PWR9BE-NEXT: xvmaxdp vs1, v2, v4
-; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0
+; PWR9BE-NEXT: xvcmpgtdp v0, v2, v4
+; PWR9BE-NEXT: xvcmpgtdp v1, v3, v5
+; PWR9BE-NEXT: xxsel vs0, v5, v3, v1
+; PWR9BE-NEXT: xxsel vs1, v4, v2, v0
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR9BE-NEXT: xxswapd vs1, vs0
-; PWR9BE-NEXT: xvmaxdp vs1, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8f64_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: xvmaxdp vs0, v3, v5
-; PWR10LE-NEXT: xvmaxdp vs1, v2, v4
-; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0
+; PWR10LE-NEXT: xvcmpgtdp v0, v2, v4
+; PWR10LE-NEXT: xvcmpgtdp v1, v3, v5
+; PWR10LE-NEXT: xxsel vs0, v5, v3, v1
+; PWR10LE-NEXT: xxsel vs1, v4, v2, v0
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
-; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8f64_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: xvmaxdp vs0, v3, v5
-; PWR10BE-NEXT: xvmaxdp vs1, v2, v4
-; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0
+; PWR10BE-NEXT: xvcmpgtdp v0, v2, v4
+; PWR10BE-NEXT: xvcmpgtdp v1, v3, v5
+; PWR10BE-NEXT: xxsel vs0, v5, v3, v1
+; PWR10BE-NEXT: xxsel vs1, v4, v2, v0
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR10BE-NEXT: xxswapd vs1, vs0
-; PWR10BE-NEXT: xvmaxdp vs1, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
@@ -884,59 +956,91 @@ entry:
define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f64_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: xvmaxdp vs0, v4, v8
-; PWR9LE-NEXT: xvmaxdp vs1, v2, v6
-; PWR9LE-NEXT: xvmaxdp vs2, v5, v9
-; PWR9LE-NEXT: xvmaxdp vs3, v3, v7
-; PWR9LE-NEXT: xvmaxdp vs2, vs3, vs2
-; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0
-; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs2
+; PWR9LE-NEXT: xvcmpgtdp v0, v2, v6
+; PWR9LE-NEXT: xvcmpgtdp v1, v4, v8
+; PWR9LE-NEXT: xvcmpgtdp v10, v3, v7
+; PWR9LE-NEXT: xvcmpgtdp v11, v5, v9
+; PWR9LE-NEXT: xxsel vs0, v9, v5, v11
+; PWR9LE-NEXT: xxsel vs1, v7, v3, v10
+; PWR9LE-NEXT: xxsel vs2, v8, v4, v1
+; PWR9LE-NEXT: xxsel vs3, v6, v2, v0
+; PWR9LE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR9LE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs0, vs1, v3
+; PWR9LE-NEXT: xxsel vs1, vs2, vs3, v2
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
-; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f64_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: xvmaxdp vs0, v4, v8
-; PWR9BE-NEXT: xvmaxdp vs1, v2, v6
-; PWR9BE-NEXT: xvmaxdp vs2, v5, v9
-; PWR9BE-NEXT: xvmaxdp vs3, v3, v7
-; PWR9BE-NEXT: xvmaxdp vs2, vs3, vs2
-; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0
-; PWR9BE-NEXT: xvmaxdp vs0, vs0, vs2
+; PWR9BE-NEXT: xvcmpgtdp v0, v2, v6
+; PWR9BE-NEXT: xvcmpgtdp v1, v4, v8
+; PWR9BE-NEXT: xvcmpgtdp v10, v3, v7
+; PWR9BE-NEXT: xvcmpgtdp v11, v5, v9
+; PWR9BE-NEXT: xxsel vs0, v9, v5, v11
+; PWR9BE-NEXT: xxsel vs1, v7, v3, v10
+; PWR9BE-NEXT: xxsel vs2, v8, v4, v1
+; PWR9BE-NEXT: xxsel vs3, v6, v2, v0
+; PWR9BE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR9BE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs0, vs1, v3
+; PWR9BE-NEXT: xxsel vs1, vs2, vs3, v2
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR9BE-NEXT: xxswapd vs1, vs0
-; PWR9BE-NEXT: xvmaxdp vs1, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f64_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: xvmaxdp vs0, v4, v8
-; PWR10LE-NEXT: xvmaxdp vs1, v2, v6
-; PWR10LE-NEXT: xvmaxdp vs2, v5, v9
-; PWR10LE-NEXT: xvmaxdp vs3, v3, v7
-; PWR10LE-NEXT: xvmaxdp vs2, vs3, vs2
-; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0
-; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs2
+; PWR10LE-NEXT: xvcmpgtdp v0, v2, v6
+; PWR10LE-NEXT: xvcmpgtdp v1, v4, v8
+; PWR10LE-NEXT: xvcmpgtdp v10, v3, v7
+; PWR10LE-NEXT: xvcmpgtdp v11, v5, v9
+; PWR10LE-NEXT: xxsel vs0, v9, v5, v11
+; PWR10LE-NEXT: xxsel vs1, v7, v3, v10
+; PWR10LE-NEXT: xxsel vs2, v8, v4, v1
+; PWR10LE-NEXT: xxsel vs3, v6, v2, v0
+; PWR10LE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR10LE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs0, vs1, v3
+; PWR10LE-NEXT: xxsel vs1, vs2, vs3, v2
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
-; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f64_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: xvmaxdp vs0, v4, v8
-; PWR10BE-NEXT: xvmaxdp vs1, v2, v6
-; PWR10BE-NEXT: xvmaxdp vs2, v5, v9
-; PWR10BE-NEXT: xvmaxdp vs3, v3, v7
-; PWR10BE-NEXT: xvmaxdp vs2, vs3, vs2
-; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0
-; PWR10BE-NEXT: xvmaxdp vs0, vs0, vs2
+; PWR10BE-NEXT: xvcmpgtdp v0, v2, v6
+; PWR10BE-NEXT: xvcmpgtdp v1, v4, v8
+; PWR10BE-NEXT: xvcmpgtdp v10, v3, v7
+; PWR10BE-NEXT: xvcmpgtdp v11, v5, v9
+; PWR10BE-NEXT: xxsel vs0, v9, v5, v11
+; PWR10BE-NEXT: xxsel vs1, v7, v3, v10
+; PWR10BE-NEXT: xxsel vs2, v8, v4, v1
+; PWR10BE-NEXT: xxsel vs3, v6, v2, v0
+; PWR10BE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR10BE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs0, vs1, v3
+; PWR10BE-NEXT: xxsel vs1, vs2, vs3, v2
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR10BE-NEXT: xxswapd vs1, vs0
-; PWR10BE-NEXT: xvmaxdp vs1, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
@@ -1052,107 +1156,171 @@ entry:
define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32f64_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: lxv vs0, 256(r1)
-; PWR9LE-NEXT: lxv vs1, 224(r1)
-; PWR9LE-NEXT: lxv vs2, 272(r1)
-; PWR9LE-NEXT: lxv vs3, 240(r1)
-; PWR9LE-NEXT: xvmaxdp vs4, v3, v11
-; PWR9LE-NEXT: xvmaxdp vs5, v5, v13
-; PWR9LE-NEXT: xvmaxdp vs6, v2, v10
-; PWR9LE-NEXT: xvmaxdp vs7, v4, v12
-; PWR9LE-NEXT: xvmaxdp vs3, v7, vs3
-; PWR9LE-NEXT: xvmaxdp vs2, v9, vs2
-; PWR9LE-NEXT: xvmaxdp vs1, v6, vs1
-; PWR9LE-NEXT: xvmaxdp vs0, v8, vs0
-; PWR9LE-NEXT: xvmaxdp vs0, vs7, vs0
-; PWR9LE-NEXT: xvmaxdp vs1, vs6, vs1
-; PWR9LE-NEXT: xvmaxdp vs2, vs5, vs2
-; PWR9LE-NEXT: xvmaxdp vs3, vs4, vs3
-; PWR9LE-NEXT: xvmaxdp vs2, vs3, vs2
-; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0
-; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs2
+; PWR9LE-NEXT: lxv vs0, 272(r1)
+; PWR9LE-NEXT: lxv vs1, 240(r1)
+; PWR9LE-NEXT: lxv vs2, 256(r1)
+; PWR9LE-NEXT: lxv vs3, 224(r1)
+; PWR9LE-NEXT: xvcmpgtdp v0, v2, v10
+; PWR9LE-NEXT: xvcmpgtdp v14, v4, v12
+; PWR9LE-NEXT: xvcmpgtdp v16, v3, v11
+; PWR9LE-NEXT: xvcmpgtdp v18, v5, v13
+; PWR9LE-NEXT: xxsel vs4, v13, v5, v18
+; PWR9LE-NEXT: xxsel vs5, v11, v3, v16
+; PWR9LE-NEXT: xxsel vs6, v12, v4, v14
+; PWR9LE-NEXT: xxsel vs7, v10, v2, v0
+; PWR9LE-NEXT: xvcmpgtdp v1, v6, vs3
+; PWR9LE-NEXT: xvcmpgtdp v15, v8, vs2
+; PWR9LE-NEXT: xvcmpgtdp v17, v7, vs1
+; PWR9LE-NEXT: xvcmpgtdp v19, v9, vs0
+; PWR9LE-NEXT: xxsel vs0, vs0, v9, v19
+; PWR9LE-NEXT: xxsel vs1, vs1, v7, v17
+; PWR9LE-NEXT: xxsel vs2, vs2, v8, v15
+; PWR9LE-NEXT: xxsel vs3, vs3, v6, v1
+; PWR9LE-NEXT: xvcmpgtdp v2, vs7, vs3
+; PWR9LE-NEXT: xvcmpgtdp v3, vs6, vs2
+; PWR9LE-NEXT: xvcmpgtdp v4, vs5, vs1
+; PWR9LE-NEXT: xvcmpgtdp v5, vs4, vs0
+; PWR9LE-NEXT: xxsel vs0, vs0, vs4, v5
+; PWR9LE-NEXT: xxsel vs1, vs1, vs5, v4
+; PWR9LE-NEXT: xxsel vs2, vs2, vs6, v3
+; PWR9LE-NEXT: xxsel vs3, vs3, vs7, v2
+; PWR9LE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR9LE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs0, vs1, v3
+; PWR9LE-NEXT: xxsel vs1, vs2, vs3, v2
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
-; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v32f64_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: lxv vs0, 272(r1)
-; PWR9BE-NEXT: lxv vs1, 240(r1)
-; PWR9BE-NEXT: lxv vs2, 288(r1)
-; PWR9BE-NEXT: lxv vs3, 256(r1)
-; PWR9BE-NEXT: xvmaxdp vs4, v3, v11
-; PWR9BE-NEXT: xvmaxdp vs5, v5, v13
-; PWR9BE-NEXT: xvmaxdp vs6, v2, v10
-; PWR9BE-NEXT: xvmaxdp vs7, v4, v12
-; PWR9BE-NEXT: xvmaxdp vs3, v7, vs3
-; PWR9BE-NEXT: xvmaxdp vs2, v9, vs2
-; PWR9BE-NEXT: xvmaxdp vs1, v6, vs1
-; PWR9BE-NEXT: xvmaxdp vs0, v8, vs0
-; PWR9BE-NEXT: xvmaxdp vs0, vs7, vs0
-; PWR9BE-NEXT: xvmaxdp vs1, vs6, vs1
-; PWR9BE-NEXT: xvmaxdp vs2, vs5, vs2
-; PWR9BE-NEXT: xvmaxdp vs3, vs4, vs3
-; PWR9BE-NEXT: xvmaxdp vs2, vs3, vs2
-; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0
-; PWR9BE-NEXT: xvmaxdp vs0, vs0, vs2
+; PWR9BE-NEXT: lxv vs0, 288(r1)
+; PWR9BE-NEXT: lxv vs1, 256(r1)
+; PWR9BE-NEXT: lxv vs2, 272(r1)
+; PWR9BE-NEXT: lxv vs3, 240(r1)
+; PWR9BE-NEXT: xvcmpgtdp v0, v2, v10
+; PWR9BE-NEXT: xvcmpgtdp v14, v4, v12
+; PWR9BE-NEXT: xvcmpgtdp v16, v3, v11
+; PWR9BE-NEXT: xvcmpgtdp v18, v5, v13
+; PWR9BE-NEXT: xxsel vs4, v13, v5, v18
+; PWR9BE-NEXT: xxsel vs5, v11, v3, v16
+; PWR9BE-NEXT: xxsel vs6, v12, v4, v14
+; PWR9BE-NEXT: xxsel vs7, v10, v2, v0
+; PWR9BE-NEXT: xvcmpgtdp v1, v6, vs3
+; PWR9BE-NEXT: xvcmpgtdp v15, v8, vs2
+; PWR9BE-NEXT: xvcmpgtdp v17, v7, vs1
+; PWR9BE-NEXT: xvcmpgtdp v19, v9, vs0
+; PWR9BE-NEXT: xxsel vs0, vs0, v9, v19
+; PWR9BE-NEXT: xxsel vs1, vs1, v7, v17
+; PWR9BE-NEXT: xxsel vs2, vs2, v8, v15
+; PWR9BE-NEXT: xxsel vs3, vs3, v6, v1
+; PWR9BE-NEXT: xvcmpgtdp v2, vs7, vs3
+; PWR9BE-NEXT: xvcmpgtdp v3, vs6, vs2
+; PWR9BE-NEXT: xvcmpgtdp v4, vs5, vs1
+; PWR9BE-NEXT: xvcmpgtdp v5, vs4, vs0
+; PWR9BE-NEXT: xxsel vs0, vs0, vs4, v5
+; PWR9BE-NEXT: xxsel vs1, vs1, vs5, v4
+; PWR9BE-NEXT: xxsel vs2, vs2, vs6, v3
+; PWR9BE-NEXT: xxsel vs3, vs3, vs7, v2
+; PWR9BE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR9BE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs0, vs1, v3
+; PWR9BE-NEXT: xxsel vs1, vs2, vs3, v2
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR9BE-NEXT: xxswapd vs1, vs0
-; PWR9BE-NEXT: xvmaxdp vs1, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v32f64_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: lxv vs0, 256(r1)
-; PWR10LE-NEXT: lxv vs1, 224(r1)
-; PWR10LE-NEXT: xvmaxdp vs4, v3, v11
-; PWR10LE-NEXT: xvmaxdp vs5, v5, v13
-; PWR10LE-NEXT: xvmaxdp vs6, v2, v10
-; PWR10LE-NEXT: xvmaxdp vs7, v4, v12
-; PWR10LE-NEXT: xvmaxdp vs1, v6, vs1
-; PWR10LE-NEXT: lxv vs2, 272(r1)
-; PWR10LE-NEXT: lxv vs3, 240(r1)
-; PWR10LE-NEXT: xvmaxdp vs3, v7, vs3
-; PWR10LE-NEXT: xvmaxdp vs2, v9, vs2
-; PWR10LE-NEXT: xvmaxdp vs0, v8, vs0
-; PWR10LE-NEXT: xvmaxdp vs0, vs7, vs0
-; PWR10LE-NEXT: xvmaxdp vs1, vs6, vs1
-; PWR10LE-NEXT: xvmaxdp vs2, vs5, vs2
-; PWR10LE-NEXT: xvmaxdp vs3, vs4, vs3
-; PWR10LE-NEXT: xvmaxdp vs2, vs3, vs2
-; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0
-; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs2
+; PWR10LE-NEXT: lxv vs0, 272(r1)
+; PWR10LE-NEXT: lxv vs1, 240(r1)
+; PWR10LE-NEXT: xvcmpgtdp v0, v2, v10
+; PWR10LE-NEXT: xvcmpgtdp v14, v4, v12
+; PWR10LE-NEXT: xvcmpgtdp v16, v3, v11
+; PWR10LE-NEXT: xvcmpgtdp v18, v5, v13
+; PWR10LE-NEXT: xxsel vs4, v13, v5, v18
+; PWR10LE-NEXT: lxv vs2, 256(r1)
+; PWR10LE-NEXT: lxv vs3, 224(r1)
+; PWR10LE-NEXT: xxsel vs5, v11, v3, v16
+; PWR10LE-NEXT: xxsel vs6, v12, v4, v14
+; PWR10LE-NEXT: xxsel vs7, v10, v2, v0
+; PWR10LE-NEXT: xvcmpgtdp v1, v6, vs3
+; PWR10LE-NEXT: xvcmpgtdp v15, v8, vs2
+; PWR10LE-NEXT: xvcmpgtdp v17, v7, vs1
+; PWR10LE-NEXT: xvcmpgtdp v19, v9, vs0
+; PWR10LE-NEXT: xxsel vs0, vs0, v9, v19
+; PWR10LE-NEXT: xxsel vs1, vs1, v7, v17
+; PWR10LE-NEXT: xxsel vs2, vs2, v8, v15
+; PWR10LE-NEXT: xxsel vs3, vs3, v6, v1
+; PWR10LE-NEXT: xvcmpgtdp v2, vs7, vs3
+; PWR10LE-NEXT: xvcmpgtdp v3, vs6, vs2
+; PWR10LE-NEXT: xvcmpgtdp v4, vs5, vs1
+; PWR10LE-NEXT: xvcmpgtdp v5, vs4, vs0
+; PWR10LE-NEXT: xxsel vs0, vs0, vs4, v5
+; PWR10LE-NEXT: xxsel vs1, vs1, vs5, v4
+; PWR10LE-NEXT: xxsel vs2, vs2, vs6, v3
+; PWR10LE-NEXT: xxsel vs3, vs3, vs7, v2
+; PWR10LE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR10LE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs0, vs1, v3
+; PWR10LE-NEXT: xxsel vs1, vs2, vs3, v2
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
-; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v32f64_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: lxv vs0, 272(r1)
-; PWR10BE-NEXT: lxv vs1, 240(r1)
-; PWR10BE-NEXT: xvmaxdp vs4, v3, v11
-; PWR10BE-NEXT: xvmaxdp vs5, v5, v13
-; PWR10BE-NEXT: xvmaxdp vs6, v2, v10
-; PWR10BE-NEXT: xvmaxdp vs7, v4, v12
-; PWR10BE-NEXT: xvmaxdp vs1, v6, vs1
-; PWR10BE-NEXT: lxv vs2, 288(r1)
-; PWR10BE-NEXT: lxv vs3, 256(r1)
-; PWR10BE-NEXT: xvmaxdp vs3, v7, vs3
-; PWR10BE-NEXT: xvmaxdp vs2, v9, vs2
-; PWR10BE-NEXT: xvmaxdp vs0, v8, vs0
-; PWR10BE-NEXT: xvmaxdp vs0, vs7, vs0
-; PWR10BE-NEXT: xvmaxdp vs1, vs6, vs1
-; PWR10BE-NEXT: xvmaxdp vs2, vs5, vs2
-; PWR10BE-NEXT: xvmaxdp vs3, vs4, vs3
-; PWR10BE-NEXT: xvmaxdp vs2, vs3, vs2
-; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0
-; PWR10BE-NEXT: xvmaxdp vs0, vs0, vs2
+; PWR10BE-NEXT: lxv vs0, 288(r1)
+; PWR10BE-NEXT: lxv vs1, 256(r1)
+; PWR10BE-NEXT: xvcmpgtdp v0, v2, v10
+; PWR10BE-NEXT: xvcmpgtdp v14, v4, v12
+; PWR10BE-NEXT: xvcmpgtdp v16, v3, v11
+; PWR10BE-NEXT: xvcmpgtdp v18, v5, v13
+; PWR10BE-NEXT: xxsel vs4, v13, v5, v18
+; PWR10BE-NEXT: lxv vs2, 272(r1)
+; PWR10BE-NEXT: lxv vs3, 240(r1)
+; PWR10BE-NEXT: xxsel vs5, v11, v3, v16
+; PWR10BE-NEXT: xxsel vs6, v12, v4, v14
+; PWR10BE-NEXT: xxsel vs7, v10, v2, v0
+; PWR10BE-NEXT: xvcmpgtdp v1, v6, vs3
+; PWR10BE-NEXT: xvcmpgtdp v15, v8, vs2
+; PWR10BE-NEXT: xvcmpgtdp v17, v7, vs1
+; PWR10BE-NEXT: xvcmpgtdp v19, v9, vs0
+; PWR10BE-NEXT: xxsel vs0, vs0, v9, v19
+; PWR10BE-NEXT: xxsel vs1, vs1, v7, v17
+; PWR10BE-NEXT: xxsel vs2, vs2, v8, v15
+; PWR10BE-NEXT: xxsel vs3, vs3, v6, v1
+; PWR10BE-NEXT: xvcmpgtdp v2, vs7, vs3
+; PWR10BE-NEXT: xvcmpgtdp v3, vs6, vs2
+; PWR10BE-NEXT: xvcmpgtdp v4, vs5, vs1
+; PWR10BE-NEXT: xvcmpgtdp v5, vs4, vs0
+; PWR10BE-NEXT: xxsel vs0, vs0, vs4, v5
+; PWR10BE-NEXT: xxsel vs1, vs1, vs5, v4
+; PWR10BE-NEXT: xxsel vs2, vs2, vs6, v3
+; PWR10BE-NEXT: xxsel vs3, vs3, vs7, v2
+; PWR10BE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR10BE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs0, vs1, v3
+; PWR10BE-NEXT: xxsel vs1, vs2, vs3, v2
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs0, vs1, v2
; PWR10BE-NEXT: xxswapd vs1, vs0
-; PWR10BE-NEXT: xvmaxdp vs1, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtdp v2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll
index e806a702cd62b..004e88029c445 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll
@@ -379,51 +379,71 @@ entry:
define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f32_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: xvminsp vs0, v3, v5
-; PWR9LE-NEXT: xvminsp vs1, v2, v4
-; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
-; PWR9LE-NEXT: xxswapd v2, vs0
-; PWR9LE-NEXT: xvminsp vs0, vs0, v2
+; PWR9LE-NEXT: xvcmpgtsp vs0, v5, v3
+; PWR9LE-NEXT: xvcmpgtsp vs1, v4, v2
+; PWR9LE-NEXT: xxsel vs1, v4, v2, vs1
+; PWR9LE-NEXT: xxsel vs0, v5, v3, vs0
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9LE-NEXT: xxsel vs0, vs0, vs1, vs2
+; PWR9LE-NEXT: xxswapd vs1, vs0
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
-; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR9LE-NEXT: xscvspdpn f1, vs0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f32_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: xvminsp vs0, v3, v5
-; PWR9BE-NEXT: xvminsp vs1, v2, v4
-; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
-; PWR9BE-NEXT: xxswapd v2, vs0
-; PWR9BE-NEXT: xvminsp vs0, vs0, v2
+; PWR9BE-NEXT: xvcmpgtsp vs0, v5, v3
+; PWR9BE-NEXT: xvcmpgtsp vs1, v4, v2
+; PWR9BE-NEXT: xxsel vs1, v4, v2, vs1
+; PWR9BE-NEXT: xxsel vs0, v5, v3, vs0
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR9BE-NEXT: xxsel vs0, vs0, vs1, vs2
+; PWR9BE-NEXT: xxswapd vs1, vs0
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
-; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9BE-NEXT: xscvspdpn f1, vs0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f32_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: xvminsp vs0, v3, v5
-; PWR10LE-NEXT: xvminsp vs1, v2, v4
-; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
-; PWR10LE-NEXT: xxswapd v2, vs0
-; PWR10LE-NEXT: xvminsp vs0, vs0, v2
+; PWR10LE-NEXT: xvcmpgtsp vs0, v5, v3
+; PWR10LE-NEXT: xvcmpgtsp vs1, v4, v2
+; PWR10LE-NEXT: xxsel vs1, v4, v2, vs1
+; PWR10LE-NEXT: xxsel vs0, v5, v3, vs0
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10LE-NEXT: xxsel vs0, vs0, vs1, vs2
+; PWR10LE-NEXT: xxswapd vs1, vs0
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
-; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR10LE-NEXT: xscvspdpn f1, vs0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f32_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: xvminsp vs0, v3, v5
-; PWR10BE-NEXT: xvminsp vs1, v2, v4
-; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
-; PWR10BE-NEXT: xxswapd v2, vs0
-; PWR10BE-NEXT: xvminsp vs0, vs0, v2
+; PWR10BE-NEXT: xvcmpgtsp vs0, v5, v3
+; PWR10BE-NEXT: xvcmpgtsp vs1, v4, v2
+; PWR10BE-NEXT: xxsel vs1, v4, v2, vs1
+; PWR10BE-NEXT: xxsel vs0, v5, v3, vs0
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs0, vs1
+; PWR10BE-NEXT: xxsel vs0, vs0, vs1, vs2
+; PWR10BE-NEXT: xxswapd vs1, vs0
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
-; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10BE-NEXT: xscvspdpn f1, vs0
; PWR10BE-NEXT: blr
entry:
@@ -523,67 +543,103 @@ entry:
define dso_local float @v32f32_fast(<32 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32f32_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: xvminsp vs0, v4, v8
-; PWR9LE-NEXT: xvminsp vs1, v2, v6
-; PWR9LE-NEXT: xvminsp vs2, v5, v9
-; PWR9LE-NEXT: xvminsp vs3, v3, v7
-; PWR9LE-NEXT: xvminsp vs2, vs3, vs2
-; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
-; PWR9LE-NEXT: xvminsp vs0, vs0, vs2
-; PWR9LE-NEXT: xxswapd v2, vs0
-; PWR9LE-NEXT: xvminsp vs0, vs0, v2
+; PWR9LE-NEXT: xvcmpgtsp vs0, v9, v5
+; PWR9LE-NEXT: xvcmpgtsp vs1, v7, v3
+; PWR9LE-NEXT: xvcmpgtsp vs2, v8, v4
+; PWR9LE-NEXT: xvcmpgtsp vs3, v6, v2
+; PWR9LE-NEXT: xxsel vs3, v6, v2, vs3
+; PWR9LE-NEXT: xxsel vs2, v8, v4, vs2
+; PWR9LE-NEXT: xxsel vs1, v7, v3, vs1
+; PWR9LE-NEXT: xxsel vs0, v9, v5, vs0
+; PWR9LE-NEXT: xvcmpgtsp vs4, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtsp vs5, vs2, vs3
+; PWR9LE-NEXT: xxsel vs2, vs2, vs3, vs5
+; PWR9LE-NEXT: xxsel vs0, vs0, vs1, vs4
+; PWR9LE-NEXT: xvcmpgtsp vs1, vs0, vs2
+; PWR9LE-NEXT: xxsel vs0, vs0, vs2, vs1
+; PWR9LE-NEXT: xxswapd vs1, vs0
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
-; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR9LE-NEXT: xscvspdpn f1, vs0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v32f32_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: xvminsp vs0, v4, v8
-; PWR9BE-NEXT: xvminsp vs1, v2, v6
-; PWR9BE-NEXT: xvminsp vs2, v5, v9
-; PWR9BE-NEXT: xvminsp vs3, v3, v7
-; PWR9BE-NEXT: xvminsp vs2, vs3, vs2
-; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
-; PWR9BE-NEXT: xvminsp vs0, vs0, vs2
-; PWR9BE-NEXT: xxswapd v2, vs0
-; PWR9BE-NEXT: xvminsp vs0, vs0, v2
+; PWR9BE-NEXT: xvcmpgtsp vs0, v9, v5
+; PWR9BE-NEXT: xvcmpgtsp vs1, v7, v3
+; PWR9BE-NEXT: xvcmpgtsp vs2, v8, v4
+; PWR9BE-NEXT: xvcmpgtsp vs3, v6, v2
+; PWR9BE-NEXT: xxsel vs3, v6, v2, vs3
+; PWR9BE-NEXT: xxsel vs2, v8, v4, vs2
+; PWR9BE-NEXT: xxsel vs1, v7, v3, vs1
+; PWR9BE-NEXT: xxsel vs0, v9, v5, vs0
+; PWR9BE-NEXT: xvcmpgtsp vs4, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtsp vs5, vs2, vs3
+; PWR9BE-NEXT: xxsel vs2, vs2, vs3, vs5
+; PWR9BE-NEXT: xxsel vs0, vs0, vs1, vs4
+; PWR9BE-NEXT: xvcmpgtsp vs1, vs0, vs2
+; PWR9BE-NEXT: xxsel vs0, vs0, vs2, vs1
+; PWR9BE-NEXT: xxswapd vs1, vs0
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
-; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR9BE-NEXT: xscvspdpn f1, vs0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v32f32_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: xvminsp vs0, v4, v8
-; PWR10LE-NEXT: xvminsp vs1, v2, v6
-; PWR10LE-NEXT: xvminsp vs2, v5, v9
-; PWR10LE-NEXT: xvminsp vs3, v3, v7
-; PWR10LE-NEXT: xvminsp vs2, vs3, vs2
-; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
-; PWR10LE-NEXT: xvminsp vs0, vs0, vs2
-; PWR10LE-NEXT: xxswapd v2, vs0
-; PWR10LE-NEXT: xvminsp vs0, vs0, v2
+; PWR10LE-NEXT: xvcmpgtsp vs0, v9, v5
+; PWR10LE-NEXT: xvcmpgtsp vs1, v7, v3
+; PWR10LE-NEXT: xvcmpgtsp vs2, v8, v4
+; PWR10LE-NEXT: xvcmpgtsp vs3, v6, v2
+; PWR10LE-NEXT: xxsel vs3, v6, v2, vs3
+; PWR10LE-NEXT: xxsel vs2, v8, v4, vs2
+; PWR10LE-NEXT: xxsel vs1, v7, v3, vs1
+; PWR10LE-NEXT: xxsel vs0, v9, v5, vs0
+; PWR10LE-NEXT: xvcmpgtsp vs4, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtsp vs5, vs2, vs3
+; PWR10LE-NEXT: xxsel vs2, vs2, vs3, vs5
+; PWR10LE-NEXT: xxsel vs0, vs0, vs1, vs4
+; PWR10LE-NEXT: xvcmpgtsp vs1, vs0, vs2
+; PWR10LE-NEXT: xxsel vs0, vs0, vs2, vs1
+; PWR10LE-NEXT: xxswapd vs1, vs0
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
-; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR10LE-NEXT: xscvspdpn f1, vs0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v32f32_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: xvminsp vs0, v4, v8
-; PWR10BE-NEXT: xvminsp vs1, v2, v6
-; PWR10BE-NEXT: xvminsp vs2, v5, v9
-; PWR10BE-NEXT: xvminsp vs3, v3, v7
-; PWR10BE-NEXT: xvminsp vs2, vs3, vs2
-; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
-; PWR10BE-NEXT: xvminsp vs0, vs0, vs2
-; PWR10BE-NEXT: xxswapd v2, vs0
-; PWR10BE-NEXT: xvminsp vs0, vs0, v2
+; PWR10BE-NEXT: xvcmpgtsp vs0, v9, v5
+; PWR10BE-NEXT: xvcmpgtsp vs1, v7, v3
+; PWR10BE-NEXT: xvcmpgtsp vs2, v8, v4
+; PWR10BE-NEXT: xvcmpgtsp vs3, v6, v2
+; PWR10BE-NEXT: xxsel vs3, v6, v2, vs3
+; PWR10BE-NEXT: xxsel vs2, v8, v4, vs2
+; PWR10BE-NEXT: xxsel vs1, v7, v3, vs1
+; PWR10BE-NEXT: xxsel vs0, v9, v5, vs0
+; PWR10BE-NEXT: xvcmpgtsp vs4, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtsp vs5, vs2, vs3
+; PWR10BE-NEXT: xxsel vs2, vs2, vs3, vs5
+; PWR10BE-NEXT: xxsel vs0, vs0, vs1, vs4
+; PWR10BE-NEXT: xvcmpgtsp vs1, vs0, vs2
+; PWR10BE-NEXT: xxsel vs0, vs0, vs2, vs1
+; PWR10BE-NEXT: xxswapd vs1, vs0
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
-; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtsp vs2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, vs2
; PWR10BE-NEXT: xscvspdpn f1, vs0
; PWR10BE-NEXT: blr
entry:
@@ -780,43 +836,59 @@ entry:
define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8f64_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: xvmindp vs0, v3, v5
-; PWR9LE-NEXT: xvmindp vs1, v2, v4
-; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
+; PWR9LE-NEXT: xvcmpgtdp v0, v5, v3
+; PWR9LE-NEXT: xvcmpgtdp v1, v4, v2
+; PWR9LE-NEXT: xxsel vs0, v4, v2, v1
+; PWR9LE-NEXT: xxsel vs1, v5, v3, v0
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
-; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8f64_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: xvmindp vs0, v3, v5
-; PWR9BE-NEXT: xvmindp vs1, v2, v4
-; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
+; PWR9BE-NEXT: xvcmpgtdp v0, v5, v3
+; PWR9BE-NEXT: xvcmpgtdp v1, v4, v2
+; PWR9BE-NEXT: xxsel vs0, v4, v2, v1
+; PWR9BE-NEXT: xxsel vs1, v5, v3, v0
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9BE-NEXT: xxswapd vs1, vs0
-; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8f64_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: xvmindp vs0, v3, v5
-; PWR10LE-NEXT: xvmindp vs1, v2, v4
-; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
+; PWR10LE-NEXT: xvcmpgtdp v0, v5, v3
+; PWR10LE-NEXT: xvcmpgtdp v1, v4, v2
+; PWR10LE-NEXT: xxsel vs0, v4, v2, v1
+; PWR10LE-NEXT: xxsel vs1, v5, v3, v0
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
-; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8f64_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: xvmindp vs0, v3, v5
-; PWR10BE-NEXT: xvmindp vs1, v2, v4
-; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
+; PWR10BE-NEXT: xvcmpgtdp v0, v5, v3
+; PWR10BE-NEXT: xvcmpgtdp v1, v4, v2
+; PWR10BE-NEXT: xxsel vs0, v4, v2, v1
+; PWR10BE-NEXT: xxsel vs1, v5, v3, v0
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10BE-NEXT: xxswapd vs1, vs0
-; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
@@ -884,59 +956,91 @@ entry:
define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f64_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: xvmindp vs0, v4, v8
-; PWR9LE-NEXT: xvmindp vs1, v2, v6
-; PWR9LE-NEXT: xvmindp vs2, v5, v9
-; PWR9LE-NEXT: xvmindp vs3, v3, v7
-; PWR9LE-NEXT: xvmindp vs2, vs3, vs2
-; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
-; PWR9LE-NEXT: xvmindp vs0, vs0, vs2
+; PWR9LE-NEXT: xvcmpgtdp v0, v9, v5
+; PWR9LE-NEXT: xvcmpgtdp v1, v7, v3
+; PWR9LE-NEXT: xvcmpgtdp v10, v8, v4
+; PWR9LE-NEXT: xvcmpgtdp v11, v6, v2
+; PWR9LE-NEXT: xxsel vs0, v6, v2, v11
+; PWR9LE-NEXT: xxsel vs1, v8, v4, v10
+; PWR9LE-NEXT: xxsel vs2, v7, v3, v1
+; PWR9LE-NEXT: xxsel vs3, v9, v5, v0
+; PWR9LE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR9LE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v3
+; PWR9LE-NEXT: xxsel vs1, vs3, vs2, v2
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
-; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f64_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: xvmindp vs0, v4, v8
-; PWR9BE-NEXT: xvmindp vs1, v2, v6
-; PWR9BE-NEXT: xvmindp vs2, v5, v9
-; PWR9BE-NEXT: xvmindp vs3, v3, v7
-; PWR9BE-NEXT: xvmindp vs2, vs3, vs2
-; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
-; PWR9BE-NEXT: xvmindp vs0, vs0, vs2
+; PWR9BE-NEXT: xvcmpgtdp v0, v9, v5
+; PWR9BE-NEXT: xvcmpgtdp v1, v7, v3
+; PWR9BE-NEXT: xvcmpgtdp v10, v8, v4
+; PWR9BE-NEXT: xvcmpgtdp v11, v6, v2
+; PWR9BE-NEXT: xxsel vs0, v6, v2, v11
+; PWR9BE-NEXT: xxsel vs1, v8, v4, v10
+; PWR9BE-NEXT: xxsel vs2, v7, v3, v1
+; PWR9BE-NEXT: xxsel vs3, v9, v5, v0
+; PWR9BE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR9BE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, v3
+; PWR9BE-NEXT: xxsel vs1, vs3, vs2, v2
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9BE-NEXT: xxswapd vs1, vs0
-; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f64_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: xvmindp vs0, v4, v8
-; PWR10LE-NEXT: xvmindp vs1, v2, v6
-; PWR10LE-NEXT: xvmindp vs2, v5, v9
-; PWR10LE-NEXT: xvmindp vs3, v3, v7
-; PWR10LE-NEXT: xvmindp vs2, vs3, vs2
-; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
-; PWR10LE-NEXT: xvmindp vs0, vs0, vs2
+; PWR10LE-NEXT: xvcmpgtdp v0, v9, v5
+; PWR10LE-NEXT: xvcmpgtdp v1, v7, v3
+; PWR10LE-NEXT: xvcmpgtdp v10, v8, v4
+; PWR10LE-NEXT: xvcmpgtdp v11, v6, v2
+; PWR10LE-NEXT: xxsel vs0, v6, v2, v11
+; PWR10LE-NEXT: xxsel vs1, v8, v4, v10
+; PWR10LE-NEXT: xxsel vs2, v7, v3, v1
+; PWR10LE-NEXT: xxsel vs3, v9, v5, v0
+; PWR10LE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR10LE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v3
+; PWR10LE-NEXT: xxsel vs1, vs3, vs2, v2
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
-; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f64_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: xvmindp vs0, v4, v8
-; PWR10BE-NEXT: xvmindp vs1, v2, v6
-; PWR10BE-NEXT: xvmindp vs2, v5, v9
-; PWR10BE-NEXT: xvmindp vs3, v3, v7
-; PWR10BE-NEXT: xvmindp vs2, vs3, vs2
-; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
-; PWR10BE-NEXT: xvmindp vs0, vs0, vs2
+; PWR10BE-NEXT: xvcmpgtdp v0, v9, v5
+; PWR10BE-NEXT: xvcmpgtdp v1, v7, v3
+; PWR10BE-NEXT: xvcmpgtdp v10, v8, v4
+; PWR10BE-NEXT: xvcmpgtdp v11, v6, v2
+; PWR10BE-NEXT: xxsel vs0, v6, v2, v11
+; PWR10BE-NEXT: xxsel vs1, v8, v4, v10
+; PWR10BE-NEXT: xxsel vs2, v7, v3, v1
+; PWR10BE-NEXT: xxsel vs3, v9, v5, v0
+; PWR10BE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR10BE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, v3
+; PWR10BE-NEXT: xxsel vs1, vs3, vs2, v2
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10BE-NEXT: xxswapd vs1, vs0
-; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
@@ -1052,107 +1156,171 @@ entry:
define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32f64_fast:
; PWR9LE: # %bb.0: # %entry
-; PWR9LE-NEXT: lxv vs0, 256(r1)
-; PWR9LE-NEXT: lxv vs1, 224(r1)
-; PWR9LE-NEXT: lxv vs2, 272(r1)
-; PWR9LE-NEXT: lxv vs3, 240(r1)
-; PWR9LE-NEXT: xvmindp vs4, v3, v11
-; PWR9LE-NEXT: xvmindp vs5, v5, v13
-; PWR9LE-NEXT: xvmindp vs6, v2, v10
-; PWR9LE-NEXT: xvmindp vs7, v4, v12
-; PWR9LE-NEXT: xvmindp vs3, v7, vs3
-; PWR9LE-NEXT: xvmindp vs2, v9, vs2
-; PWR9LE-NEXT: xvmindp vs1, v6, vs1
-; PWR9LE-NEXT: xvmindp vs0, v8, vs0
-; PWR9LE-NEXT: xvmindp vs0, vs7, vs0
-; PWR9LE-NEXT: xvmindp vs1, vs6, vs1
-; PWR9LE-NEXT: xvmindp vs2, vs5, vs2
-; PWR9LE-NEXT: xvmindp vs3, vs4, vs3
-; PWR9LE-NEXT: xvmindp vs2, vs3, vs2
-; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
-; PWR9LE-NEXT: xvmindp vs0, vs0, vs2
+; PWR9LE-NEXT: lxv vs0, 224(r1)
+; PWR9LE-NEXT: lxv vs1, 256(r1)
+; PWR9LE-NEXT: lxv vs2, 240(r1)
+; PWR9LE-NEXT: lxv vs3, 272(r1)
+; PWR9LE-NEXT: xvcmpgtdp v1, v13, v5
+; PWR9LE-NEXT: xvcmpgtdp v15, v11, v3
+; PWR9LE-NEXT: xvcmpgtdp v17, v12, v4
+; PWR9LE-NEXT: xvcmpgtdp v19, v10, v2
+; PWR9LE-NEXT: xxsel vs4, v10, v2, v19
+; PWR9LE-NEXT: xxsel vs5, v12, v4, v17
+; PWR9LE-NEXT: xxsel vs6, v11, v3, v15
+; PWR9LE-NEXT: xxsel vs7, v13, v5, v1
+; PWR9LE-NEXT: xvcmpgtdp v0, vs3, v9
+; PWR9LE-NEXT: xvcmpgtdp v14, vs2, v7
+; PWR9LE-NEXT: xvcmpgtdp v16, vs1, v8
+; PWR9LE-NEXT: xvcmpgtdp v18, vs0, v6
+; PWR9LE-NEXT: xxsel vs0, vs0, v6, v18
+; PWR9LE-NEXT: xxsel vs1, vs1, v8, v16
+; PWR9LE-NEXT: xxsel vs2, vs2, v7, v14
+; PWR9LE-NEXT: xxsel vs3, vs3, v9, v0
+; PWR9LE-NEXT: xvcmpgtdp v2, vs3, vs7
+; PWR9LE-NEXT: xvcmpgtdp v3, vs2, vs6
+; PWR9LE-NEXT: xvcmpgtdp v4, vs1, vs5
+; PWR9LE-NEXT: xvcmpgtdp v5, vs0, vs4
+; PWR9LE-NEXT: xxsel vs0, vs0, vs4, v5
+; PWR9LE-NEXT: xxsel vs1, vs1, vs5, v4
+; PWR9LE-NEXT: xxsel vs2, vs2, vs6, v3
+; PWR9LE-NEXT: xxsel vs3, vs3, vs7, v2
+; PWR9LE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR9LE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v3
+; PWR9LE-NEXT: xxsel vs1, vs3, vs2, v2
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
-; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
+; PWR9LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v32f64_fast:
; PWR9BE: # %bb.0: # %entry
-; PWR9BE-NEXT: lxv vs0, 272(r1)
-; PWR9BE-NEXT: lxv vs1, 240(r1)
-; PWR9BE-NEXT: lxv vs2, 288(r1)
-; PWR9BE-NEXT: lxv vs3, 256(r1)
-; PWR9BE-NEXT: xvmindp vs4, v3, v11
-; PWR9BE-NEXT: xvmindp vs5, v5, v13
-; PWR9BE-NEXT: xvmindp vs6, v2, v10
-; PWR9BE-NEXT: xvmindp vs7, v4, v12
-; PWR9BE-NEXT: xvmindp vs3, v7, vs3
-; PWR9BE-NEXT: xvmindp vs2, v9, vs2
-; PWR9BE-NEXT: xvmindp vs1, v6, vs1
-; PWR9BE-NEXT: xvmindp vs0, v8, vs0
-; PWR9BE-NEXT: xvmindp vs0, vs7, vs0
-; PWR9BE-NEXT: xvmindp vs1, vs6, vs1
-; PWR9BE-NEXT: xvmindp vs2, vs5, vs2
-; PWR9BE-NEXT: xvmindp vs3, vs4, vs3
-; PWR9BE-NEXT: xvmindp vs2, vs3, vs2
-; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
-; PWR9BE-NEXT: xvmindp vs0, vs0, vs2
+; PWR9BE-NEXT: lxv vs0, 240(r1)
+; PWR9BE-NEXT: lxv vs1, 272(r1)
+; PWR9BE-NEXT: lxv vs2, 256(r1)
+; PWR9BE-NEXT: lxv vs3, 288(r1)
+; PWR9BE-NEXT: xvcmpgtdp v1, v13, v5
+; PWR9BE-NEXT: xvcmpgtdp v15, v11, v3
+; PWR9BE-NEXT: xvcmpgtdp v17, v12, v4
+; PWR9BE-NEXT: xvcmpgtdp v19, v10, v2
+; PWR9BE-NEXT: xxsel vs4, v10, v2, v19
+; PWR9BE-NEXT: xxsel vs5, v12, v4, v17
+; PWR9BE-NEXT: xxsel vs6, v11, v3, v15
+; PWR9BE-NEXT: xxsel vs7, v13, v5, v1
+; PWR9BE-NEXT: xvcmpgtdp v0, vs3, v9
+; PWR9BE-NEXT: xvcmpgtdp v14, vs2, v7
+; PWR9BE-NEXT: xvcmpgtdp v16, vs1, v8
+; PWR9BE-NEXT: xvcmpgtdp v18, vs0, v6
+; PWR9BE-NEXT: xxsel vs0, vs0, v6, v18
+; PWR9BE-NEXT: xxsel vs1, vs1, v8, v16
+; PWR9BE-NEXT: xxsel vs2, vs2, v7, v14
+; PWR9BE-NEXT: xxsel vs3, vs3, v9, v0
+; PWR9BE-NEXT: xvcmpgtdp v2, vs3, vs7
+; PWR9BE-NEXT: xvcmpgtdp v3, vs2, vs6
+; PWR9BE-NEXT: xvcmpgtdp v4, vs1, vs5
+; PWR9BE-NEXT: xvcmpgtdp v5, vs0, vs4
+; PWR9BE-NEXT: xxsel vs0, vs0, vs4, v5
+; PWR9BE-NEXT: xxsel vs1, vs1, vs5, v4
+; PWR9BE-NEXT: xxsel vs2, vs2, vs6, v3
+; PWR9BE-NEXT: xxsel vs3, vs3, vs7, v2
+; PWR9BE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR9BE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, v3
+; PWR9BE-NEXT: xxsel vs1, vs3, vs2, v2
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR9BE-NEXT: xxswapd vs1, vs0
-; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
+; PWR9BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR9BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v32f64_fast:
; PWR10LE: # %bb.0: # %entry
-; PWR10LE-NEXT: lxv vs0, 256(r1)
-; PWR10LE-NEXT: lxv vs1, 224(r1)
-; PWR10LE-NEXT: xvmindp vs4, v3, v11
-; PWR10LE-NEXT: xvmindp vs5, v5, v13
-; PWR10LE-NEXT: xvmindp vs6, v2, v10
-; PWR10LE-NEXT: xvmindp vs7, v4, v12
-; PWR10LE-NEXT: xvmindp vs1, v6, vs1
-; PWR10LE-NEXT: lxv vs2, 272(r1)
-; PWR10LE-NEXT: lxv vs3, 240(r1)
-; PWR10LE-NEXT: xvmindp vs3, v7, vs3
-; PWR10LE-NEXT: xvmindp vs2, v9, vs2
-; PWR10LE-NEXT: xvmindp vs0, v8, vs0
-; PWR10LE-NEXT: xvmindp vs0, vs7, vs0
-; PWR10LE-NEXT: xvmindp vs1, vs6, vs1
-; PWR10LE-NEXT: xvmindp vs2, vs5, vs2
-; PWR10LE-NEXT: xvmindp vs3, vs4, vs3
-; PWR10LE-NEXT: xvmindp vs2, vs3, vs2
-; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
-; PWR10LE-NEXT: xvmindp vs0, vs0, vs2
+; PWR10LE-NEXT: lxv vs0, 224(r1)
+; PWR10LE-NEXT: lxv vs1, 256(r1)
+; PWR10LE-NEXT: xvcmpgtdp v1, v13, v5
+; PWR10LE-NEXT: xvcmpgtdp v15, v11, v3
+; PWR10LE-NEXT: xvcmpgtdp v17, v12, v4
+; PWR10LE-NEXT: xvcmpgtdp v19, v10, v2
+; PWR10LE-NEXT: xxsel vs4, v10, v2, v19
+; PWR10LE-NEXT: lxv vs2, 240(r1)
+; PWR10LE-NEXT: lxv vs3, 272(r1)
+; PWR10LE-NEXT: xxsel vs5, v12, v4, v17
+; PWR10LE-NEXT: xxsel vs6, v11, v3, v15
+; PWR10LE-NEXT: xxsel vs7, v13, v5, v1
+; PWR10LE-NEXT: xvcmpgtdp v0, vs3, v9
+; PWR10LE-NEXT: xvcmpgtdp v14, vs2, v7
+; PWR10LE-NEXT: xvcmpgtdp v16, vs1, v8
+; PWR10LE-NEXT: xvcmpgtdp v18, vs0, v6
+; PWR10LE-NEXT: xxsel vs0, vs0, v6, v18
+; PWR10LE-NEXT: xxsel vs1, vs1, v8, v16
+; PWR10LE-NEXT: xxsel vs2, vs2, v7, v14
+; PWR10LE-NEXT: xxsel vs3, vs3, v9, v0
+; PWR10LE-NEXT: xvcmpgtdp v2, vs3, vs7
+; PWR10LE-NEXT: xvcmpgtdp v3, vs2, vs6
+; PWR10LE-NEXT: xvcmpgtdp v4, vs1, vs5
+; PWR10LE-NEXT: xvcmpgtdp v5, vs0, vs4
+; PWR10LE-NEXT: xxsel vs0, vs0, vs4, v5
+; PWR10LE-NEXT: xxsel vs1, vs1, vs5, v4
+; PWR10LE-NEXT: xxsel vs2, vs2, vs6, v3
+; PWR10LE-NEXT: xxsel vs3, vs3, vs7, v2
+; PWR10LE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR10LE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v3
+; PWR10LE-NEXT: xxsel vs1, vs3, vs2, v2
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
-; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
+; PWR10LE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10LE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v32f64_fast:
; PWR10BE: # %bb.0: # %entry
-; PWR10BE-NEXT: lxv vs0, 272(r1)
-; PWR10BE-NEXT: lxv vs1, 240(r1)
-; PWR10BE-NEXT: xvmindp vs4, v3, v11
-; PWR10BE-NEXT: xvmindp vs5, v5, v13
-; PWR10BE-NEXT: xvmindp vs6, v2, v10
-; PWR10BE-NEXT: xvmindp vs7, v4, v12
-; PWR10BE-NEXT: xvmindp vs1, v6, vs1
-; PWR10BE-NEXT: lxv vs2, 288(r1)
-; PWR10BE-NEXT: lxv vs3, 256(r1)
-; PWR10BE-NEXT: xvmindp vs3, v7, vs3
-; PWR10BE-NEXT: xvmindp vs2, v9, vs2
-; PWR10BE-NEXT: xvmindp vs0, v8, vs0
-; PWR10BE-NEXT: xvmindp vs0, vs7, vs0
-; PWR10BE-NEXT: xvmindp vs1, vs6, vs1
-; PWR10BE-NEXT: xvmindp vs2, vs5, vs2
-; PWR10BE-NEXT: xvmindp vs3, vs4, vs3
-; PWR10BE-NEXT: xvmindp vs2, vs3, vs2
-; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
-; PWR10BE-NEXT: xvmindp vs0, vs0, vs2
+; PWR10BE-NEXT: lxv vs0, 240(r1)
+; PWR10BE-NEXT: lxv vs1, 272(r1)
+; PWR10BE-NEXT: xvcmpgtdp v1, v13, v5
+; PWR10BE-NEXT: xvcmpgtdp v15, v11, v3
+; PWR10BE-NEXT: xvcmpgtdp v17, v12, v4
+; PWR10BE-NEXT: xvcmpgtdp v19, v10, v2
+; PWR10BE-NEXT: xxsel vs4, v10, v2, v19
+; PWR10BE-NEXT: lxv vs2, 256(r1)
+; PWR10BE-NEXT: lxv vs3, 288(r1)
+; PWR10BE-NEXT: xxsel vs5, v12, v4, v17
+; PWR10BE-NEXT: xxsel vs6, v11, v3, v15
+; PWR10BE-NEXT: xxsel vs7, v13, v5, v1
+; PWR10BE-NEXT: xvcmpgtdp v0, vs3, v9
+; PWR10BE-NEXT: xvcmpgtdp v14, vs2, v7
+; PWR10BE-NEXT: xvcmpgtdp v16, vs1, v8
+; PWR10BE-NEXT: xvcmpgtdp v18, vs0, v6
+; PWR10BE-NEXT: xxsel vs0, vs0, v6, v18
+; PWR10BE-NEXT: xxsel vs1, vs1, v8, v16
+; PWR10BE-NEXT: xxsel vs2, vs2, v7, v14
+; PWR10BE-NEXT: xxsel vs3, vs3, v9, v0
+; PWR10BE-NEXT: xvcmpgtdp v2, vs3, vs7
+; PWR10BE-NEXT: xvcmpgtdp v3, vs2, vs6
+; PWR10BE-NEXT: xvcmpgtdp v4, vs1, vs5
+; PWR10BE-NEXT: xvcmpgtdp v5, vs0, vs4
+; PWR10BE-NEXT: xxsel vs0, vs0, vs4, v5
+; PWR10BE-NEXT: xxsel vs1, vs1, vs5, v4
+; PWR10BE-NEXT: xxsel vs2, vs2, vs6, v3
+; PWR10BE-NEXT: xxsel vs3, vs3, vs7, v2
+; PWR10BE-NEXT: xvcmpgtdp v2, vs3, vs2
+; PWR10BE-NEXT: xvcmpgtdp v3, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, v3
+; PWR10BE-NEXT: xxsel vs1, vs3, vs2, v2
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs0, vs1, vs0, v2
; PWR10BE-NEXT: xxswapd vs1, vs0
-; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
+; PWR10BE-NEXT: xvcmpgtdp v2, vs1, vs0
+; PWR10BE-NEXT: xxsel vs1, vs1, vs0, v2
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
index 7bdf4e06029d2..d5bdb2c19e8f8 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Test vector maximum on z14.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
@@ -21,8 +22,9 @@ declare fp128 @llvm.maximum.f128(fp128, fp128)
; Test the fmax library function.
define double @f1(double %dummy, double %val1, double %val2) {
; CHECK-LABEL: f1:
-; CHECK: wfmaxdb %f0, %f2, %f4, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmaxdb %f0, %f2, %f4, 4
+; CHECK-NEXT: br %r14
%ret = call double @fmax(double %val1, double %val2) readnone
ret double %ret
}
@@ -30,8 +32,9 @@ define double @f1(double %dummy, double %val1, double %val2) {
; Test the f64 maxnum intrinsic.
define double @f2(double %dummy, double %val1, double %val2) {
; CHECK-LABEL: f2:
-; CHECK: wfmaxdb %f0, %f2, %f4, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmaxdb %f0, %f2, %f4, 4
+; CHECK-NEXT: br %r14
%ret = call double @llvm.maxnum.f64(double %val1, double %val2)
ret double %ret
}
@@ -39,8 +42,9 @@ define double @f2(double %dummy, double %val1, double %val2) {
; Test the f64 maximum intrinsic.
define double @f3(double %dummy, double %val1, double %val2) {
; CHECK-LABEL: f3:
-; CHECK: wfmaxdb %f0, %f2, %f4, 1
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmaxdb %f0, %f2, %f4, 1
+; CHECK-NEXT: br %r14
%ret = call double @llvm.maximum.f64(double %val1, double %val2)
ret double %ret
}
@@ -48,9 +52,13 @@ define double @f3(double %dummy, double %val1, double %val2) {
; Test a f64 constant compare/select resulting in maxnum.
define double @f4(double %dummy, double %val) {
; CHECK-LABEL: f4:
-; CHECK: lzdr [[REG:%f[0-9]+]]
-; CHECK: wfmaxdb %f0, %f2, [[REG]], 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: ltdbr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT: lzdr %f0
+; CHECK-NEXT: br %r14
%cmp = fcmp ogt double %val, 0.0
%ret = select i1 %cmp, double %val, double 0.0
ret double %ret
@@ -59,9 +67,13 @@ define double @f4(double %dummy, double %val) {
; Test a f64 constant compare/select resulting in maximum.
define double @f5(double %dummy, double %val) {
; CHECK-LABEL: f5:
-; CHECK: ltdbr %f1, %f2
-; CHECK-NEXT: ldr %f0, %f2
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: ltdbr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: bnler %r14
+; CHECK-NEXT: .LBB4_1:
+; CHECK-NEXT: lzdr %f0
+; CHECK-NEXT: br %r14
%cmp = fcmp ugt double %val, 0.0
%ret = select i1 %cmp, double %val, double 0.0
ret double %ret
@@ -69,20 +81,22 @@ define double @f5(double %dummy, double %val) {
; Test the v2f64 maxnum intrinsic.
define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
- <2 x double> %val2) {
; CHECK-LABEL: f6:
-; CHECK: vfmaxdb %v24, %v26, %v28, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmaxdb %v24, %v26, %v28, 4
+; CHECK-NEXT: br %r14
+ <2 x double> %val2) {
%ret = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %val1, <2 x double> %val2)
ret <2 x double> %ret
}
; Test the v2f64 maximum intrinsic.
define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1,
- <2 x double> %val2) {
; CHECK-LABEL: f7:
-; CHECK: vfmaxdb %v24, %v26, %v28, 1
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmaxdb %v24, %v26, %v28, 1
+; CHECK-NEXT: br %r14
+ <2 x double> %val2) {
%ret = call <2 x double> @llvm.maximum.v2f64(<2 x double> %val1, <2 x double> %val2)
ret <2 x double> %ret
}
@@ -90,8 +104,9 @@ define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1,
; Test the fmaxf library function.
define float @f11(float %dummy, float %val1, float %val2) {
; CHECK-LABEL: f11:
-; CHECK: wfmaxsb %f0, %f2, %f4, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmaxsb %f0, %f2, %f4, 4
+; CHECK-NEXT: br %r14
%ret = call float @fmaxf(float %val1, float %val2) readnone
ret float %ret
}
@@ -99,8 +114,9 @@ define float @f11(float %dummy, float %val1, float %val2) {
; Test the f32 maxnum intrinsic.
define float @f12(float %dummy, float %val1, float %val2) {
; CHECK-LABEL: f12:
-; CHECK: wfmaxsb %f0, %f2, %f4, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmaxsb %f0, %f2, %f4, 4
+; CHECK-NEXT: br %r14
%ret = call float @llvm.maxnum.f32(float %val1, float %val2)
ret float %ret
}
@@ -108,8 +124,9 @@ define float @f12(float %dummy, float %val1, float %val2) {
; Test the f32 maximum intrinsic.
define float @f13(float %dummy, float %val1, float %val2) {
; CHECK-LABEL: f13:
-; CHECK: wfmaxsb %f0, %f2, %f4, 1
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmaxsb %f0, %f2, %f4, 1
+; CHECK-NEXT: br %r14
%ret = call float @llvm.maximum.f32(float %val1, float %val2)
ret float %ret
}
@@ -117,9 +134,13 @@ define float @f13(float %dummy, float %val1, float %val2) {
; Test a f32 constant compare/select resulting in maxnum.
define float @f14(float %dummy, float %val) {
; CHECK-LABEL: f14:
-; CHECK: lzer [[REG:%f[0-9]+]]
-; CHECK: wfmaxsb %f0, %f2, [[REG]], 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: ltebr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB10_1:
+; CHECK-NEXT: lzer %f0
+; CHECK-NEXT: br %r14
%cmp = fcmp ogt float %val, 0.0
%ret = select i1 %cmp, float %val, float 0.0
ret float %ret
@@ -128,9 +149,13 @@ define float @f14(float %dummy, float %val) {
; Test a f32 constant compare/select resulting in maximum.
define float @f15(float %dummy, float %val) {
; CHECK-LABEL: f15:
-; CHECK: ltebr %f1, %f2
-; CHECK: ldr %f0, %f2
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: ltebr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: bnler %r14
+; CHECK-NEXT: .LBB11_1:
+; CHECK-NEXT: lzer %f0
+; CHECK-NEXT: br %r14
%cmp = fcmp ugt float %val, 0.0
%ret = select i1 %cmp, float %val, float 0.0
ret float %ret
@@ -138,20 +163,22 @@ define float @f15(float %dummy, float %val) {
; Test the v4f32 maxnum intrinsic.
define <4 x float> @f16(<4 x float> %dummy, <4 x float> %val1,
- <4 x float> %val2) {
; CHECK-LABEL: f16:
-; CHECK: vfmaxsb %v24, %v26, %v28, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmaxsb %v24, %v26, %v28, 4
+; CHECK-NEXT: br %r14
+ <4 x float> %val2) {
%ret = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %val1, <4 x float> %val2)
ret <4 x float> %ret
}
; Test the v4f32 maximum intrinsic.
define <4 x float> @f17(<4 x float> %dummy, <4 x float> %val1,
- <4 x float> %val2) {
; CHECK-LABEL: f17:
-; CHECK: vfmaxsb %v24, %v26, %v28, 1
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmaxsb %v24, %v26, %v28, 1
+; CHECK-NEXT: br %r14
+ <4 x float> %val2) {
%ret = call <4 x float> @llvm.maximum.v4f32(<4 x float> %val1, <4 x float> %val2)
ret <4 x float> %ret
}
@@ -159,11 +186,12 @@ define <4 x float> @f17(<4 x float> %dummy, <4 x float> %val1,
; Test the fmaxl library function.
define void @f21(ptr %ptr1, ptr %ptr2, ptr %dst) {
; CHECK-LABEL: f21:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
-; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
-; CHECK: vst [[RES]], 0(%r4)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: wfmaxxb %v0, %v0, %v1, 4
+; CHECK-NEXT: vst %v0, 0(%r4), 3
+; CHECK-NEXT: br %r14
%val1 = load fp128, ptr %ptr1
%val2 = load fp128, ptr %ptr2
%res = call fp128 @fmaxl(fp128 %val1, fp128 %val2) readnone
@@ -174,11 +202,12 @@ define void @f21(ptr %ptr1, ptr %ptr2, ptr %dst) {
; Test the f128 maxnum intrinsic.
define void @f22(ptr %ptr1, ptr %ptr2, ptr %dst) {
; CHECK-LABEL: f22:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
-; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
-; CHECK: vst [[RES]], 0(%r4)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: wfmaxxb %v0, %v0, %v1, 4
+; CHECK-NEXT: vst %v0, 0(%r4), 3
+; CHECK-NEXT: br %r14
%val1 = load fp128, ptr %ptr1
%val2 = load fp128, ptr %ptr2
%res = call fp128 @llvm.maxnum.f128(fp128 %val1, fp128 %val2)
@@ -189,11 +218,12 @@ define void @f22(ptr %ptr1, ptr %ptr2, ptr %dst) {
; Test the f128 maximum intrinsic.
define void @f23(ptr %ptr1, ptr %ptr2, ptr %dst) {
; CHECK-LABEL: f23:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
-; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 1
-; CHECK: vst [[RES]], 0(%r4)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: wfmaxxb %v0, %v0, %v1, 1
+; CHECK-NEXT: vst %v0, 0(%r4), 3
+; CHECK-NEXT: br %r14
%val1 = load fp128, ptr %ptr1
%val2 = load fp128, ptr %ptr2
%res = call fp128 @llvm.maximum.f128(fp128 %val1, fp128 %val2)
@@ -204,11 +234,16 @@ define void @f23(ptr %ptr1, ptr %ptr2, ptr %dst) {
; Test a f128 constant compare/select resulting in maxnum.
define void @f24(ptr %ptr, ptr %dst) {
; CHECK-LABEL: f24:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vzero [[REG2:%v[0-9]+]]
-; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
-; CHECK: vst [[RES]], 0(%r3)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vzero %v1
+; CHECK-NEXT: wfcxb %v0, %v1
+; CHECK-NEXT: jh .LBB17_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vzero %v0
+; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
%cmp = fcmp ogt fp128 %val, 0xL00000000000000000000000000000000
%res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
@@ -219,11 +254,16 @@ define void @f24(ptr %ptr, ptr %dst) {
; Test a f128 constant compare/select resulting in maximum.
define void @f25(ptr %ptr, ptr %dst) {
; CHECK-LABEL: f25:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vzero [[REG2:%v[0-9]+]]
-; CHECK: wfcxb [[REG1]], [[REG2]]
-; CHECK: vst [[RES]], 0(%r3)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vzero %v1
+; CHECK-NEXT: wfcxb %v0, %v1
+; CHECK-NEXT: jnle .LBB18_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vzero %v0
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
%cmp = fcmp ugt fp128 %val, 0xL00000000000000000000000000000000
%res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
index e8d4b2828c84b..b4652a921b0ba 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll
@@ -1,12 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Test vector maximum/minimum with a zero splat on z14.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
define <2 x double> @f1(<2 x double> %val) {
; CHECK-LABEL: f1:
-; CHECK: vgbm %v0, 0
-; CHECK-NEXT: vfmaxdb %v24, %v24, %v0, 4
-; CHECK-NEXT: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vfchdb %v1, %v24, %v0
+; CHECK-NEXT: vsel %v24, %v24, %v0, %v1
+; CHECK-NEXT: br %r14
%cmp = fcmp ogt <2 x double> %val, zeroinitializer
%ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
ret <2 x double> %ret
@@ -14,9 +17,11 @@ define <2 x double> @f1(<2 x double> %val) {
define <2 x double> @f2(<2 x double> %val) {
; CHECK-LABEL: f2:
-; CHECK: vgbm %v0, 0
-; CHECK-NEXT: vfmindb %v24, %v24, %v0, 4
-; CHECK-NEXT: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vfchdb %v1, %v0, %v24
+; CHECK-NEXT: vsel %v24, %v24, %v0, %v1
+; CHECK-NEXT: br %r14
%cmp = fcmp olt <2 x double> %val, zeroinitializer
%ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
ret <2 x double> %ret
@@ -24,9 +29,11 @@ define <2 x double> @f2(<2 x double> %val) {
define <4 x float> @f3(<4 x float> %val) {
; CHECK-LABEL: f3:
-; CHECK: vgbm %v0, 0
-; CHECK-NEXT: vfmaxsb %v24, %v24, %v0, 4
-; CHECK-NEXT: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vfchsb %v1, %v24, %v0
+; CHECK-NEXT: vsel %v24, %v24, %v0, %v1
+; CHECK-NEXT: br %r14
%cmp = fcmp ogt <4 x float> %val, zeroinitializer
%ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
ret <4 x float> %ret
@@ -34,9 +41,11 @@ define <4 x float> @f3(<4 x float> %val) {
define <4 x float> @f4(<4 x float> %val) {
; CHECK-LABEL: f4:
-; CHECK: vgbm %v0, 0
-; CHECK-NEXT: vfminsb %v24, %v24, %v0, 4
-; CHECK-NEXT: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vfchsb %v1, %v0, %v24
+; CHECK-NEXT: vsel %v24, %v24, %v0, %v1
+; CHECK-NEXT: br %r14
%cmp = fcmp olt <4 x float> %val, zeroinitializer
%ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
ret <4 x float> %ret
@@ -44,10 +53,11 @@ define <4 x float> @f4(<4 x float> %val) {
define <2 x double> @f5(<2 x double> %val) {
; CHECK-LABEL: f5:
-; CHECK: vgbm %v0, 0
-; CHECK-NEXT: vfchedb %v1, %v0, %v24
-; CHECK-NEXT: vsel %v24, %v0, %v24, %v1
-; CHECK-NEXT: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vfchedb %v1, %v0, %v24
+; CHECK-NEXT: vsel %v24, %v0, %v24, %v1
+; CHECK-NEXT: br %r14
%cmp = fcmp ugt <2 x double> %val, zeroinitializer
%ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
ret <2 x double> %ret
@@ -55,10 +65,11 @@ define <2 x double> @f5(<2 x double> %val) {
define <2 x double> @f6(<2 x double> %val) {
; CHECK-LABEL: f6:
-; CHECK: vgbm %v0, 0
-; CHECK-NEXT: vfchedb %v1, %v24, %v0
-; CHECK-NEXT: vsel %v24, %v0, %v24, %v1
-; CHECK-NEXT: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vfchedb %v1, %v24, %v0
+; CHECK-NEXT: vsel %v24, %v0, %v24, %v1
+; CHECK-NEXT: br %r14
%cmp = fcmp ult <2 x double> %val, zeroinitializer
%ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer
ret <2 x double> %ret
@@ -66,10 +77,11 @@ define <2 x double> @f6(<2 x double> %val) {
define <4 x float> @f7(<4 x float> %val) {
; CHECK-LABEL: f7:
-; CHECK: vgbm %v0, 0
-; CHECK-NEXT: vfchesb %v1, %v0, %v24
-; CHECK-NEXT: vsel %v24, %v0, %v24, %v1
-; CHECK-NEXT: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vfchesb %v1, %v0, %v24
+; CHECK-NEXT: vsel %v24, %v0, %v24, %v1
+; CHECK-NEXT: br %r14
%cmp = fcmp ugt <4 x float> %val, zeroinitializer
%ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
ret <4 x float> %ret
@@ -77,10 +89,11 @@ define <4 x float> @f7(<4 x float> %val) {
define <4 x float> @f8(<4 x float> %val) {
; CHECK-LABEL: f8:
-; CHECK: vgbm %v0, 0
-; CHECK-NEXT: vfchesb %v1, %v24, %v0
-; CHECK-NEXT: vsel %v24, %v0, %v24, %v1
-; CHECK-NEXT: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vfchesb %v1, %v24, %v0
+; CHECK-NEXT: vsel %v24, %v0, %v24, %v1
+; CHECK-NEXT: br %r14
%cmp = fcmp ult <4 x float> %val, zeroinitializer
%ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer
ret <4 x float> %ret
diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
index bf27eb3e56036..3489f5943ebbe 100644
--- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Test vector minimum on z14.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
@@ -21,8 +22,9 @@ declare fp128 @llvm.minimum.f128(fp128, fp128)
; Test the fmin library function.
define double @f1(double %dummy, double %val1, double %val2) {
; CHECK-LABEL: f1:
-; CHECK: wfmindb %f0, %f2, %f4, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmindb %f0, %f2, %f4, 4
+; CHECK-NEXT: br %r14
%ret = call double @fmin(double %val1, double %val2) readnone
ret double %ret
}
@@ -30,8 +32,9 @@ define double @f1(double %dummy, double %val1, double %val2) {
; Test the f64 minnum intrinsic.
define double @f2(double %dummy, double %val1, double %val2) {
; CHECK-LABEL: f2:
-; CHECK: wfmindb %f0, %f2, %f4, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmindb %f0, %f2, %f4, 4
+; CHECK-NEXT: br %r14
%ret = call double @llvm.minnum.f64(double %val1, double %val2)
ret double %ret
}
@@ -39,8 +42,9 @@ define double @f2(double %dummy, double %val1, double %val2) {
; Test the f64 minimum intrinsic.
define double @f3(double %dummy, double %val1, double %val2) {
; CHECK-LABEL: f3:
-; CHECK: wfmindb %f0, %f2, %f4, 1
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfmindb %f0, %f2, %f4, 1
+; CHECK-NEXT: br %r14
%ret = call double @llvm.minimum.f64(double %val1, double %val2)
ret double %ret
}
@@ -48,9 +52,13 @@ define double @f3(double %dummy, double %val1, double %val2) {
; Test a f64 constant compare/select resulting in minnum.
define double @f4(double %dummy, double %val) {
; CHECK-LABEL: f4:
-; CHECK: lzdr [[REG:%f[0-9]+]]
-; CHECK: wfmindb %f0, %f2, [[REG]], 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: ltdbr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT: lzdr %f0
+; CHECK-NEXT: br %r14
%cmp = fcmp olt double %val, 0.0
%ret = select i1 %cmp, double %val, double 0.0
ret double %ret
@@ -59,30 +67,34 @@ define double @f4(double %dummy, double %val) {
; Test a f64 constant compare/select resulting in minimum.
define double @f5(double %dummy, double %val) {
; CHECK-LABEL: f5:
-; CHECK: ltdbr %f1, %f2
-; CHECK-NEXT: ldr %f0, %f2
-; CHECK: bnher %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: ltdbr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: bnher %r14
+; CHECK-NEXT: .LBB4_1:
+; CHECK-NEXT: lzdr %f0
+; CHECK-NEXT: br %r14
%cmp = fcmp ult double %val, 0.0
%ret = select i1 %cmp, double %val, double 0.0
ret double %ret
}
; Test the v2f64 minnum intrinsic.
-define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
- <2 x double> %val2) {
+define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f6:
-; CHECK: vfmindb %v24, %v26, %v28, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmindb %v24, %v26, %v28, 4
+; CHECK-NEXT: br %r14
%ret = call <2 x double> @llvm.minnum.v2f64(<2 x double> %val1, <2 x double> %val2)
ret <2 x double> %ret
}
; Test the v2f64 minimum intrinsic.
-define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1,
- <2 x double> %val2) {
+define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1, <2 x double> %val2) {
; CHECK-LABEL: f7:
-; CHECK: vfmindb %v24, %v26, %v28, 1
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmindb %v24, %v26, %v28, 1
+; CHECK-NEXT: br %r14
%ret = call <2 x double> @llvm.minimum.v2f64(<2 x double> %val1, <2 x double> %val2)
ret <2 x double> %ret
}
@@ -90,8 +102,9 @@ define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1,
; Test the fminf library function.
define float @f11(float %dummy, float %val1, float %val2) {
; CHECK-LABEL: f11:
-; CHECK: wfminsb %f0, %f2, %f4, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfminsb %f0, %f2, %f4, 4
+; CHECK-NEXT: br %r14
%ret = call float @fminf(float %val1, float %val2) readnone
ret float %ret
}
@@ -99,8 +112,9 @@ define float @f11(float %dummy, float %val1, float %val2) {
; Test the f32 minnum intrinsic.
define float @f12(float %dummy, float %val1, float %val2) {
; CHECK-LABEL: f12:
-; CHECK: wfminsb %f0, %f2, %f4, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfminsb %f0, %f2, %f4, 4
+; CHECK-NEXT: br %r14
%ret = call float @llvm.minnum.f32(float %val1, float %val2)
ret float %ret
}
@@ -108,8 +122,9 @@ define float @f12(float %dummy, float %val1, float %val2) {
; Test the f32 minimum intrinsic.
define float @f13(float %dummy, float %val1, float %val2) {
; CHECK-LABEL: f13:
-; CHECK: wfminsb %f0, %f2, %f4, 1
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: wfminsb %f0, %f2, %f4, 1
+; CHECK-NEXT: br %r14
%ret = call float @llvm.minimum.f32(float %val1, float %val2)
ret float %ret
}
@@ -117,9 +132,13 @@ define float @f13(float %dummy, float %val1, float %val2) {
; Test a f32 constant compare/select resulting in minnum.
define float @f14(float %dummy, float %val) {
; CHECK-LABEL: f14:
-; CHECK: lzer [[REG:%f[0-9]+]]
-; CHECK: wfminsb %f0, %f2, [[REG]], 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: ltebr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB10_1:
+; CHECK-NEXT: lzer %f0
+; CHECK-NEXT: br %r14
%cmp = fcmp olt float %val, 0.0
%ret = select i1 %cmp, float %val, float 0.0
ret float %ret
@@ -128,30 +147,35 @@ define float @f14(float %dummy, float %val) {
; Test a f32 constant compare/select resulting in minimum.
define float @f15(float %dummy, float %val) {
; CHECK-LABEL: f15:
-; CHECK: ltebr %f1, %f2
-; CHECK: ldr %f0, %f2
-; CHECK: bnher %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: ltebr %f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: bnher %r14
+; CHECK-NEXT: .LBB11_1:
+; CHECK-NEXT: lzer %f0
+; CHECK-NEXT: br %r14
%cmp = fcmp ult float %val, 0.0
%ret = select i1 %cmp, float %val, float 0.0
ret float %ret
}
; Test the v4f32 minnum intrinsic.
-define <4 x float> @f16(<4 x float> %dummy, <4 x float> %val1,
- <4 x float> %val2) {
+define <4 x float> @f16(<4 x float> %dummy, <4 x float> %val1, <4 x float> %val2) {
; CHECK-LABEL: f16:
-; CHECK: vfminsb %v24, %v26, %v28, 4
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfminsb %v24, %v26, %v28, 4
+; CHECK-NEXT: br %r14
%ret = call <4 x float> @llvm.minnum.v4f32(<4 x float> %val1, <4 x float> %val2)
ret <4 x float> %ret
}
; Test the v4f32 minimum intrinsic.
define <4 x float> @f17(<4 x float> %dummy, <4 x float> %val1,
- <4 x float> %val2) {
; CHECK-LABEL: f17:
-; CHECK: vfminsb %v24, %v26, %v28, 1
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfminsb %v24, %v26, %v28, 1
+; CHECK-NEXT: br %r14
+ <4 x float> %val2) {
%ret = call <4 x float> @llvm.minimum.v4f32(<4 x float> %val1, <4 x float> %val2)
ret <4 x float> %ret
}
@@ -159,11 +183,12 @@ define <4 x float> @f17(<4 x float> %dummy, <4 x float> %val1,
; Test the fminl library function.
define void @f21(ptr %ptr1, ptr %ptr2, ptr %dst) {
; CHECK-LABEL: f21:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
-; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
-; CHECK: vst [[RES]], 0(%r4)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: wfminxb %v0, %v0, %v1, 4
+; CHECK-NEXT: vst %v0, 0(%r4), 3
+; CHECK-NEXT: br %r14
%val1 = load fp128, ptr %ptr1
%val2 = load fp128, ptr %ptr2
%res = call fp128 @fminl(fp128 %val1, fp128 %val2) readnone
@@ -174,11 +199,12 @@ define void @f21(ptr %ptr1, ptr %ptr2, ptr %dst) {
; Test the f128 minnum intrinsic.
define void @f22(ptr %ptr1, ptr %ptr2, ptr %dst) {
; CHECK-LABEL: f22:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
-; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
-; CHECK: vst [[RES]], 0(%r4)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: wfminxb %v0, %v0, %v1, 4
+; CHECK-NEXT: vst %v0, 0(%r4), 3
+; CHECK-NEXT: br %r14
%val1 = load fp128, ptr %ptr1
%val2 = load fp128, ptr %ptr2
%res = call fp128 @llvm.minnum.f128(fp128 %val1, fp128 %val2)
@@ -189,11 +215,12 @@ define void @f22(ptr %ptr1, ptr %ptr2, ptr %dst) {
; Test the f128 minimum intrinsic.
define void @f23(ptr %ptr1, ptr %ptr2, ptr %dst) {
; CHECK-LABEL: f23:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
-; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 1
-; CHECK: vst [[RES]], 0(%r4)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: wfminxb %v0, %v0, %v1, 1
+; CHECK-NEXT: vst %v0, 0(%r4), 3
+; CHECK-NEXT: br %r14
%val1 = load fp128, ptr %ptr1
%val2 = load fp128, ptr %ptr2
%res = call fp128 @llvm.minimum.f128(fp128 %val1, fp128 %val2)
@@ -204,11 +231,16 @@ define void @f23(ptr %ptr1, ptr %ptr2, ptr %dst) {
; Test a f128 constant compare/select resulting in minnum.
define void @f24(ptr %ptr, ptr %dst) {
; CHECK-LABEL: f24:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vzero [[REG2:%v[0-9]+]]
-; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
-; CHECK: vst [[RES]], 0(%r3)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vzero %v1
+; CHECK-NEXT: wfcxb %v0, %v1
+; CHECK-NEXT: jl .LBB17_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vzero %v0
+; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
%cmp = fcmp olt fp128 %val, 0xL00000000000000000000000000000000
%res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
@@ -219,11 +251,16 @@ define void @f24(ptr %ptr, ptr %dst) {
; Test a f128 constant compare/select resulting in minimum.
define void @f25(ptr %ptr, ptr %dst) {
; CHECK-LABEL: f25:
-; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK-DAG: vzero [[REG2:%v[0-9]+]]
-; CHECK: wfcxb [[REG1]], [[REG2]]
-; CHECK: vst [[RES]], 0(%r3)
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vzero %v1
+; CHECK-NEXT: wfcxb %v0, %v1
+; CHECK-NEXT: jnhe .LBB18_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vzero %v0
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
%val = load fp128, ptr %ptr
%cmp = fcmp ult fp128 %val, 0xL00000000000000000000000000000000
%res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
index d536e6b72ac9c..226ef472b8d84 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
@@ -247,15 +247,32 @@ entry:
define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: maxnm_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vmaxnm.f32 s3, s7, s3
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s6, s2
-; CHECK-MVE-NEXT: vmaxnm.f32 s1, s5, s1
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s5, s1
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f32 s7, s3
+; CHECK-MVE-NEXT: cset r1, gt
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f32 s6, s2
+; CHECK-MVE-NEXT: cset r2, gt
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: cset r3, gt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s7
+; CHECK-MVE-NEXT: cmp r3, #0
+; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s6
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s5
+; CHECK-MVE-NEXT: cmp r1, #0
+; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s4
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: maxnm_float32_t:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmaxnm.f32 q0, q1, q0
+; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
+; CHECK-MVEFP-NEXT: vpsel q0, q1, q0
; CHECK-MVEFP-NEXT: bx lr
entry:
%cmp = fcmp fast ogt <4 x float> %src2, %src1
@@ -268,29 +285,62 @@ define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half>
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vmovx.f16 s8, s0
; CHECK-MVE-NEXT: vmovx.f16 s10, s4
-; CHECK-MVE-NEXT: vminnm.f16 s0, s4, s0
-; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8
-; CHECK-MVE-NEXT: vins.f16 s0, s8
+; CHECK-MVE-NEXT: vcmp.f16 s10, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s4, s0
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s8, s10, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s0, s4, s0
; CHECK-MVE-NEXT: vmovx.f16 s4, s1
+; CHECK-MVE-NEXT: vins.f16 s0, s8
; CHECK-MVE-NEXT: vmovx.f16 s8, s5
-; CHECK-MVE-NEXT: vminnm.f16 s1, s5, s1
-; CHECK-MVE-NEXT: vminnm.f16 s4, s8, s4
+; CHECK-MVE-NEXT: vcmp.f16 s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s5, s1
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s1, s5, s1
; CHECK-MVE-NEXT: vins.f16 s1, s4
; CHECK-MVE-NEXT: vmovx.f16 s4, s2
-; CHECK-MVE-NEXT: vminnm.f16 s2, s6, s2
-; CHECK-MVE-NEXT: vminnm.f16 s4, s8, s4
+; CHECK-MVE-NEXT: vcmp.f16 s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s6, s2
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s2, s6, s2
+; CHECK-MVE-NEXT: vmovx.f16 s6, s7
; CHECK-MVE-NEXT: vins.f16 s2, s4
; CHECK-MVE-NEXT: vmovx.f16 s4, s3
-; CHECK-MVE-NEXT: vmovx.f16 s6, s7
-; CHECK-MVE-NEXT: vminnm.f16 s3, s7, s3
-; CHECK-MVE-NEXT: vminnm.f16 s4, s6, s4
+; CHECK-MVE-NEXT: vcmp.f16 s6, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s7, s3
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: cset r0, gt
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s3, s7, s3
; CHECK-MVE-NEXT: vins.f16 s3, s4
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: minnm_float16_t:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vminnm.f16 q0, q1, q0
+; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0
+; CHECK-MVEFP-NEXT: vpsel q0, q0, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%cmp = fcmp fast ogt <8 x half> %src2, %src1
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
index eeb1d0d1e7dbc..c40bb789ed3a4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll
@@ -741,8 +741,10 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: fcmp_fast_olt_v4f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vpt.f32 eq, q0, zr
-; CHECK-NEXT: vminnmt.f32 q0, q1, q2
+; CHECK-NEXT: vcmp.f32 gt, q2, q1
+; CHECK-NEXT: vpsel q1, q1, q2
+; CHECK-NEXT: vcmp.f32 eq, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = fcmp oeq <4 x float> %z, zeroinitializer
@@ -755,8 +757,10 @@ entry:
define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
; CHECK-LABEL: fcmp_fast_olt_v8f16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vpt.f16 eq, q0, zr
-; CHECK-NEXT: vminnmt.f16 q0, q1, q2
+; CHECK-NEXT: vcmp.f16 gt, q2, q1
+; CHECK-NEXT: vpsel q1, q1, q2
+; CHECK-NEXT: vcmp.f16 eq, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = fcmp oeq <8 x half> %z, zeroinitializer
@@ -769,8 +773,10 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: fcmp_fast_ogt_v4f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vpt.f32 eq, q0, zr
-; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2
+; CHECK-NEXT: vcmp.f32 gt, q1, q2
+; CHECK-NEXT: vpsel q1, q1, q2
+; CHECK-NEXT: vcmp.f32 eq, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = fcmp oeq <4 x float> %z, zeroinitializer
@@ -783,8 +789,10 @@ entry:
define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
; CHECK-LABEL: fcmp_fast_ogt_v8f16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vpt.f16 eq, q0, zr
-; CHECK-NEXT: vmaxnmt.f16 q0, q1, q2
+; CHECK-NEXT: vcmp.f16 gt, q1, q2
+; CHECK-NEXT: vpsel q1, q1, q2
+; CHECK-NEXT: vcmp.f16 eq, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = fcmp oeq <8 x half> %z, zeroinitializer
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
index de7af894bd4fb..91a868c392daf 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll
@@ -852,9 +852,11 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_olt_v4f32_x:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f32 gt, q1, q0
+; CHECK-NEXT: vpsel q1, q0, q1
; CHECK-NEXT: vctp.32 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vminnmt.f32 q0, q0, q1
+; CHECK-NEXT: vmovt q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
@@ -867,9 +869,11 @@ entry:
define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_olt_v8f16_x:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f16 gt, q1, q0
+; CHECK-NEXT: vpsel q1, q0, q1
; CHECK-NEXT: vctp.16 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vminnmt.f16 q0, q0, q1
+; CHECK-NEXT: vmovt q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
@@ -882,9 +886,11 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_ogt_v4f32_x:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f32 gt, q0, q1
+; CHECK-NEXT: vpsel q1, q0, q1
; CHECK-NEXT: vctp.32 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1
+; CHECK-NEXT: vmovt q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
@@ -897,9 +903,11 @@ entry:
define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_ogt_v8f16_x:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f16 gt, q0, q1
+; CHECK-NEXT: vpsel q1, q0, q1
; CHECK-NEXT: vctp.16 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1
+; CHECK-NEXT: vmovt q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
@@ -2427,9 +2435,11 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_olt_v4f32_y:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f32 gt, q1, q0
+; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vctp.32 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vminnmt.f32 q1, q0, q1
+; CHECK-NEXT: vmovt q1, q0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -2443,9 +2453,11 @@ entry:
define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_olt_v8f16_y:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f16 gt, q1, q0
+; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vctp.16 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vminnmt.f16 q1, q0, q1
+; CHECK-NEXT: vmovt q1, q0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -2459,9 +2471,11 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_ogt_v4f32_y:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f32 gt, q0, q1
+; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vctp.32 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vmaxnmt.f32 q1, q0, q1
+; CHECK-NEXT: vmovt q1, q0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -2475,9 +2489,11 @@ entry:
define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_ogt_v8f16_y:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f16 gt, q0, q1
+; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vctp.16 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vmaxnmt.f16 q1, q0, q1
+; CHECK-NEXT: vmovt q1, q0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
index 080c6c1a1efdc..5c327ffad52f2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll
@@ -906,9 +906,11 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_olt_v4f32_x:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f32 gt, q1, q0
+; CHECK-NEXT: vpsel q1, q0, q1
; CHECK-NEXT: vctp.32 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vminnmt.f32 q0, q0, q1
+; CHECK-NEXT: vmovt q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
@@ -921,9 +923,11 @@ entry:
define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_olt_v8f16_x:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f16 gt, q1, q0
+; CHECK-NEXT: vpsel q1, q0, q1
; CHECK-NEXT: vctp.16 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vminnmt.f16 q0, q0, q1
+; CHECK-NEXT: vmovt q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
@@ -936,9 +940,11 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_ogt_v4f32_x:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f32 gt, q0, q1
+; CHECK-NEXT: vpsel q1, q0, q1
; CHECK-NEXT: vctp.32 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1
+; CHECK-NEXT: vmovt q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
@@ -951,9 +957,11 @@ entry:
define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
; CHECK-LABEL: fcmp_fast_ogt_v8f16_x:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f16 gt, q0, q1
+; CHECK-NEXT: vpsel q1, q0, q1
; CHECK-NEXT: vctp.16 r0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1
+; CHECK-NEXT: vmovt q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
index 7cafb7262f460..61da12a667b64 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
@@ -391,7 +391,9 @@ define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) {
; CHECK-LABEL: fmin_v2f32_acc:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vminnm.f32 s0, s0, s1
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
@@ -406,7 +408,9 @@ define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) {
; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
-; CHECK-FP-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-FP-NEXT: vcmp.f32 s0, s4
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v4f32_acc:
@@ -414,7 +418,9 @@ define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) {
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
-; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-NOFP-NEXT: vcmp.f32 s0, s4
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
@@ -430,7 +436,9 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
-; CHECK-FP-NEXT: vminnm.f32 s0, s8, s0
+; CHECK-FP-NEXT: vcmp.f32 s0, s8
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v8f32_acc:
@@ -450,7 +458,9 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
-; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0
+; CHECK-NOFP-NEXT: vcmp.f32 s0, s8
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
@@ -467,7 +477,9 @@ define arm_aapcs_vfpcc half @fmin_v4f16_acc(<4 x half> %x, half %y) {
; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6
; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0
+; CHECK-FP-NEXT: vcmp.f16 s0, s4
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v4f16_acc:
@@ -477,7 +489,9 @@ define arm_aapcs_vfpcc half @fmin_v4f16_acc(<4 x half> %x, half %y) {
; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0
+; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
@@ -491,7 +505,9 @@ define arm_aapcs_vfpcc half @fmin_v2f16_acc(<2 x half> %x, half %y) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovx.f16 s2, s0
; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vminnm.f16 s0, s4, s0
+; CHECK-NEXT: vcmp.f16 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s4, s0
; CHECK-NEXT: bx lr
entry:
%z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x)
@@ -508,7 +524,9 @@ define arm_aapcs_vfpcc half @fmin_v8f16_acc(<8 x half> %x, half %y) {
; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0
+; CHECK-FP-NEXT: vcmp.f16 s0, s4
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v8f16_acc:
@@ -524,7 +542,9 @@ define arm_aapcs_vfpcc half @fmin_v8f16_acc(<8 x half> %x, half %y) {
; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0
+; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
@@ -542,7 +562,9 @@ define arm_aapcs_vfpcc half @fmin_v16f16_acc(<16 x half> %x, half %y) {
; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-FP-NEXT: vminnm.f16 s0, s8, s0
+; CHECK-FP-NEXT: vcmp.f16 s0, s8
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f16 s0, s8, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v16f16_acc:
@@ -586,7 +608,9 @@ define arm_aapcs_vfpcc half @fmin_v16f16_acc(<16 x half> %x, half %y) {
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0
+; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f16 s0, s8, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
@@ -598,7 +622,9 @@ entry:
define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) {
; CHECK-LABEL: fmin_v1f64_acc:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vminnm.f64 d0, d1, d0
+; CHECK-NEXT: vcmp.f64 d0, d1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d0, d1, d0
; CHECK-NEXT: bx lr
entry:
%z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
@@ -611,7 +637,9 @@ define arm_aapcs_vfpcc double @fmin_v2f64_acc(<2 x double> %x, double %y) {
; CHECK-LABEL: fmin_v2f64_acc:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vminnm.f64 d0, d0, d1
-; CHECK-NEXT: vminnm.f64 d0, d2, d0
+; CHECK-NEXT: vcmp.f64 d0, d2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d0, d2, d0
; CHECK-NEXT: bx lr
entry:
%z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
@@ -630,7 +658,9 @@ define arm_aapcs_vfpcc double @fmin_v4f64_acc(<4 x double> %x, double %y) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d0, d0, d2
; CHECK-NEXT: vminnm.f64 d0, d0, d1
-; CHECK-NEXT: vminnm.f64 d0, d4, d0
+; CHECK-NEXT: vcmp.f64 d0, d4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d0, d4, d0
; CHECK-NEXT: bx lr
entry:
%z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
@@ -1265,7 +1295,9 @@ define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) {
; CHECK-LABEL: fmax_v2f32_acc:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
@@ -1280,7 +1312,9 @@ define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) {
; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
-; CHECK-FP-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-FP-NEXT: vcmp.f32 s4, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v4f32_acc:
@@ -1288,7 +1322,9 @@ define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) {
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
-; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
@@ -1304,7 +1340,9 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) {
; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
-; CHECK-FP-NEXT: vmaxnm.f32 s0, s8, s0
+; CHECK-FP-NEXT: vcmp.f32 s8, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v8f32_acc:
@@ -1324,7 +1362,9 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) {
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0
+; CHECK-NOFP-NEXT: vcmp.f32 s8, s0
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
@@ -1338,7 +1378,9 @@ define arm_aapcs_vfpcc half @fmax_v2f16_acc(<2 x half> %x, half %y) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovx.f16 s2, s0
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f16 s0, s4, s0
+; CHECK-NEXT: vcmp.f16 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s4, s0
; CHECK-NEXT: bx lr
entry:
%z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x)
@@ -1355,7 +1397,9 @@ define arm_aapcs_vfpcc half @fmax_v4f16_acc(<4 x half> %x, half %y) {
; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6
; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0
+; CHECK-FP-NEXT: vcmp.f16 s4, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v4f16_acc:
@@ -1365,7 +1409,9 @@ define arm_aapcs_vfpcc half @fmax_v4f16_acc(<4 x half> %x, half %y) {
; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0
+; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
@@ -1382,7 +1428,9 @@ define arm_aapcs_vfpcc half @fmax_v8f16_acc(<8 x half> %x, half %y) {
; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0
+; CHECK-FP-NEXT: vcmp.f16 s4, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v8f16_acc:
@@ -1398,7 +1446,9 @@ define arm_aapcs_vfpcc half @fmax_v8f16_acc(<8 x half> %x, half %y) {
; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0
+; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
@@ -1416,7 +1466,9 @@ define arm_aapcs_vfpcc half @fmax_v16f16_acc(<16 x half> %x, half %y) {
; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-FP-NEXT: vmaxnm.f16 s0, s8, s0
+; CHECK-FP-NEXT: vcmp.f16 s8, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vselgt.f16 s0, s8, s0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v16f16_acc:
@@ -1460,7 +1512,9 @@ define arm_aapcs_vfpcc half @fmax_v16f16_acc(<16 x half> %x, half %y) {
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0
+; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
+; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vselgt.f16 s0, s8, s0
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
@@ -1472,7 +1526,9 @@ entry:
define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) {
; CHECK-LABEL: fmax_v1f64_acc:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmaxnm.f64 d0, d1, d0
+; CHECK-NEXT: vcmp.f64 d1, d0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d0, d1, d0
; CHECK-NEXT: bx lr
entry:
%z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
@@ -1485,7 +1541,9 @@ define arm_aapcs_vfpcc double @fmax_v2f64_acc(<2 x double> %x, double %y) {
; CHECK-LABEL: fmax_v2f64_acc:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
-; CHECK-NEXT: vmaxnm.f64 d0, d2, d0
+; CHECK-NEXT: vcmp.f64 d2, d0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d0, d2, d0
; CHECK-NEXT: bx lr
entry:
%z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
@@ -1504,7 +1562,9 @@ define arm_aapcs_vfpcc double @fmax_v4f64_acc(<4 x double> %x, double %y) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d0, d0, d2
; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
-; CHECK-NEXT: vmaxnm.f64 d0, d4, d0
+; CHECK-NEXT: vcmp.f64 d4, d0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d0, d4, d0
; CHECK-NEXT: bx lr
entry:
%z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
More information about the llvm-commits
mailing list