[llvm] 42605b8 - Revert "[SelectionDAG] Avoid one comparison when legalizing fmaximum (#142732)"
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 05:23:00 PDT 2025
Author: Nikita Popov
Date: 2025-06-04T14:22:19+02:00
New Revision: 42605b8aa31b82d8f3ba15bdca11ff3d52527a5e
URL: https://github.com/llvm/llvm-project/commit/42605b8aa31b82d8f3ba15bdca11ff3d52527a5e
DIFF: https://github.com/llvm/llvm-project/commit/42605b8aa31b82d8f3ba15bdca11ff3d52527a5e.diff
LOG: Revert "[SelectionDAG] Avoid one comparison when legalizing fmaximum (#142732)"
This reverts commit 54da543a14da6dd0e594875241494949cb659b08.
I made a logic error here with the assumption that both values
are known to be +/-0.0.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
llvm/test/CodeGen/ARM/fp-maximum-legalization.ll
llvm/test/CodeGen/NVPTX/bf16-instructions.ll
llvm/test/CodeGen/NVPTX/math-intrins.ll
llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll
llvm/test/CodeGen/X86/fminimum-fmaximum.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f34bf0ca7ede0..c8fe8971e593c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8610,16 +8610,19 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
// fminimum/fmaximum requires -0.0 less than +0.0
if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
!DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
+ auto IsSpecificZero = [&](SDValue F) {
+ FloatSignAsInt State;
+ DAG.getSignAsIntValue(State, DL, F);
+ return DAG.getSetCC(DL, CCVT, State.IntValue,
+ DAG.getConstant(0, DL, State.IntValue.getValueType()),
+ IsMax ? ISD::SETEQ : ISD::SETNE);
+ };
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
- FloatSignAsInt State;
- DAG.getSignAsIntValue(State, DL, LHS);
- SDValue IsSpecificZero =
- DAG.getSetCC(DL, CCVT, State.IntValue,
- DAG.getConstant(0, DL, State.IntValue.getValueType()),
- IsMax ? ISD::SETEQ : ISD::SETNE);
- SDValue Sel = DAG.getSelect(DL, VT, IsSpecificZero, LHS, RHS, Flags);
- MinMax = DAG.getSelect(DL, VT, IsZero, Sel, MinMax, Flags);
+ SDValue LCmp =
+ DAG.getSelect(DL, VT, IsSpecificZero(LHS), LHS, MinMax, Flags);
+ SDValue RCmp = DAG.getSelect(DL, VT, IsSpecificZero(RHS), RHS, LCmp, Flags);
+ MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
}
return MinMax;
diff --git a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
index b47470fb78234..9f542abcb80f7 100644
--- a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
@@ -46,46 +46,51 @@ define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind {
; CHECK-LABEL: maximum_fp128:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #96
-; CHECK-NEXT: str q0, [sp, #64]
-; CHECK-NEXT: mov v2.16b, v1.16b
-; CHECK-NEXT: ldrb w8, [sp, #79]
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: b.ne .LBB1_2
+; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: stp q1, q0, [sp, #48]
+; CHECK-NEXT: bl __gttf2
+; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: b.le .LBB1_2
; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: mov v2.16b, v0.16b
+; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: bl __gttf2
-; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: mov v2.16b, v1.16b
-; CHECK-NEXT: b.le .LBB1_4
-; CHECK-NEXT: // %bb.3:
-; CHECK-NEXT: mov v2.16b, v0.16b
-; CHECK-NEXT: .LBB1_4:
-; CHECK-NEXT: str q2, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: bl __unordtf2
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.eq .LBB1_6
-; CHECK-NEXT: // %bb.5:
+; CHECK-NEXT: b.eq .LBB1_4
+; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT: .LBB1_4:
+; CHECK-NEXT: ldrb w8, [sp, #79]
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: b.ne .LBB1_6
+; CHECK-NEXT: // %bb.5:
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .LBB1_6:
-; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: ldrb w8, [sp, #63]
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: b.ne .LBB1_8
+; CHECK-NEXT: // %bb.7:
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: .LBB1_8:
; CHECK-NEXT: adrp x8, .LCPI1_1
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_1]
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.ne .LBB1_8
-; CHECK-NEXT: // %bb.7:
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: .LBB1_8:
+; CHECK-NEXT: b.ne .LBB1_10
+; CHECK-NEXT: // %bb.9:
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: .LBB1_10:
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll b/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll
index f3e3e17a22eaf..a3ab144356e16 100644
--- a/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll
+++ b/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll
@@ -4,7 +4,7 @@
define double @maximum_double(double %x, double %y) nounwind {
; CHECK-LABEL: maximum_double:
; CHECK: @ %bb.0:
-; CHECK-NEXT: sub sp, sp, #8
+; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: vmov d16, r0, r1
@@ -12,26 +12,32 @@ define double @maximum_double(double %x, double %y) nounwind {
; CHECK-NEXT: vcmp.f64 d16, d17
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vldr d18, .LCPI0_0
+; CHECK-NEXT: vstr d16, [sp, #8]
+; CHECK-NEXT: vstr d17, [sp]
+; CHECK-NEXT: ldrb r1, [sp, #15]
; CHECK-NEXT: vmov.f64 d19, d17
-; CHECK-NEXT: vstr d16, [sp]
-; CHECK-NEXT: ldrb r1, [sp, #7]
; CHECK-NEXT: clz r1, r1
+; CHECK-NEXT: vldr d18, .LCPI0_0
; CHECK-NEXT: movwvs r2, #1
; CHECK-NEXT: movwgt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmovne.f64 d19, d16
; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: ldrb r2, [sp, #7]
; CHECK-NEXT: vmovne.f64 d19, d18
; CHECK-NEXT: lsrs r1, r1, #5
+; CHECK-NEXT: clz r1, r2
; CHECK-NEXT: vcmp.f64 d19, #0
-; CHECK-NEXT: vmovne.f64 d17, d16
+; CHECK-NEXT: vmov.f64 d18, d19
+; CHECK-NEXT: vmovne.f64 d18, d16
+; CHECK-NEXT: lsrs r1, r1, #5
+; CHECK-NEXT: vmovne.f64 d18, d17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: movweq r0, #1
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vmovne.f64 d19, d17
+; CHECK-NEXT: vmovne.f64 d19, d18
; CHECK-NEXT: vmov r0, r1, d19
-; CHECK-NEXT: add sp, sp, #8
+; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index e16ddb778099c..096649e5bde43 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -1351,28 +1351,30 @@ define bfloat @test_roundeven(bfloat %a) {
define bfloat @test_maximum(bfloat %a, bfloat %b) {
; SM70-LABEL: test_maximum(
; SM70: {
-; SM70-NEXT: .reg .pred %p<5>;
-; SM70-NEXT: .reg .b16 %rs<7>;
+; SM70-NEXT: .reg .pred %p<6>;
+; SM70-NEXT: .reg .b16 %rs<8>;
; SM70-NEXT: .reg .b32 %r<7>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %rs1, [test_maximum_param_0];
-; SM70-NEXT: setp.eq.s16 %p1, %rs1, 0;
; SM70-NEXT: ld.param.b16 %rs2, [test_maximum_param_1];
-; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; SM70-NEXT: cvt.u32.u16 %r1, %rs2;
; SM70-NEXT: shl.b32 %r2, %r1, 16;
; SM70-NEXT: cvt.u32.u16 %r3, %rs1;
; SM70-NEXT: shl.b32 %r4, %r3, 16;
-; SM70-NEXT: setp.gt.f32 %p2, %r4, %r2;
-; SM70-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
-; SM70-NEXT: setp.nan.f32 %p3, %r4, %r2;
-; SM70-NEXT: selp.b16 %rs5, 0x7FC0, %rs4, %p3;
-; SM70-NEXT: cvt.u32.u16 %r5, %rs5;
+; SM70-NEXT: setp.gt.f32 %p1, %r4, %r2;
+; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
+; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2;
+; SM70-NEXT: selp.b16 %rs4, 0x7FC0, %rs3, %p2;
+; SM70-NEXT: setp.eq.s16 %p3, %rs1, 0;
+; SM70-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
+; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; SM70-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
+; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
; SM70-NEXT: shl.b32 %r6, %r5, 16;
-; SM70-NEXT: setp.eq.f32 %p4, %r6, 0f00000000;
-; SM70-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs6;
+; SM70-NEXT: setp.eq.f32 %p5, %r6, 0f00000000;
+; SM70-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5;
+; SM70-NEXT: st.param.b16 [func_retval0], %rs7;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_maximum(
@@ -1473,44 +1475,48 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
; SM70-LABEL: test_maximum_v2(
; SM70: {
-; SM70-NEXT: .reg .pred %p<9>;
-; SM70-NEXT: .reg .b16 %rs<15>;
+; SM70-NEXT: .reg .pred %p<11>;
+; SM70-NEXT: .reg .b16 %rs<19>;
; SM70-NEXT: .reg .b32 %r<16>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_0];
; SM70-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_1];
; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r2;
-; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
-; SM70-NEXT: setp.eq.s16 %p1, %rs4, 0;
-; SM70-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
; SM70-NEXT: cvt.u32.u16 %r3, %rs2;
; SM70-NEXT: shl.b32 %r4, %r3, 16;
+; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
; SM70-NEXT: shl.b32 %r6, %r5, 16;
-; SM70-NEXT: setp.gt.f32 %p2, %r6, %r4;
-; SM70-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
-; SM70-NEXT: setp.nan.f32 %p3, %r6, %r4;
-; SM70-NEXT: selp.b16 %rs9, 0x7FC0, %rs8, %p3;
-; SM70-NEXT: cvt.u32.u16 %r7, %rs9;
+; SM70-NEXT: setp.gt.f32 %p1, %r6, %r4;
+; SM70-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
+; SM70-NEXT: setp.nan.f32 %p2, %r6, %r4;
+; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2;
+; SM70-NEXT: setp.eq.s16 %p3, %rs4, 0;
+; SM70-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
+; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; SM70-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
+; SM70-NEXT: cvt.u32.u16 %r7, %rs6;
; SM70-NEXT: shl.b32 %r8, %r7, 16;
-; SM70-NEXT: setp.eq.f32 %p4, %r8, 0f00000000;
-; SM70-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
-; SM70-NEXT: setp.eq.s16 %p5, %rs3, 0;
-; SM70-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
+; SM70-NEXT: setp.eq.f32 %p5, %r8, 0f00000000;
+; SM70-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
; SM70-NEXT: cvt.u32.u16 %r9, %rs1;
; SM70-NEXT: shl.b32 %r10, %r9, 16;
; SM70-NEXT: cvt.u32.u16 %r11, %rs3;
; SM70-NEXT: shl.b32 %r12, %r11, 16;
; SM70-NEXT: setp.gt.f32 %p6, %r12, %r10;
-; SM70-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6;
+; SM70-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6;
; SM70-NEXT: setp.nan.f32 %p7, %r12, %r10;
-; SM70-NEXT: selp.b16 %rs13, 0x7FC0, %rs12, %p7;
-; SM70-NEXT: cvt.u32.u16 %r13, %rs13;
+; SM70-NEXT: selp.b16 %rs15, 0x7FC0, %rs14, %p7;
+; SM70-NEXT: setp.eq.s16 %p8, %rs3, 0;
+; SM70-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
+; SM70-NEXT: setp.eq.s16 %p9, %rs1, 0;
+; SM70-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
+; SM70-NEXT: cvt.u32.u16 %r13, %rs15;
; SM70-NEXT: shl.b32 %r14, %r13, 16;
-; SM70-NEXT: setp.eq.f32 %p8, %r14, 0f00000000;
-; SM70-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8;
-; SM70-NEXT: mov.b32 %r15, {%rs14, %rs10};
+; SM70-NEXT: setp.eq.f32 %p10, %r14, 0f00000000;
+; SM70-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10;
+; SM70-NEXT: mov.b32 %r15, {%rs18, %rs13};
; SM70-NEXT: st.param.b32 [func_retval0], %r15;
; SM70-NEXT: ret;
;
diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
index d84d725bf72cf..441fdec7ce5c0 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -612,25 +612,27 @@ define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) {
define half @minimum_half(half %a, half %b) {
; CHECK-NOF16-LABEL: minimum_half(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<5>;
-; CHECK-NOF16-NEXT: .reg .b16 %rs<7>;
+; CHECK-NOF16-NEXT: .reg .pred %p<6>;
+; CHECK-NOF16-NEXT: .reg .b16 %rs<8>;
; CHECK-NOF16-NEXT: .reg .b32 %r<4>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0];
-; CHECK-NOF16-NEXT: setp.ne.s16 %p1, %rs1, 0;
; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1];
-; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs1;
-; CHECK-NOF16-NEXT: setp.lt.f32 %p2, %r2, %r1;
-; CHECK-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p3, %r2, %r1;
-; CHECK-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p3;
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs5;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r3, 0f00000000;
-; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4;
-; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs6;
+; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r2, %r1;
+; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
+; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2;
+; CHECK-NOF16-NEXT: setp.ne.s16 %p3, %rs1, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
+; CHECK-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
+; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5;
+; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs7;
; CHECK-NOF16-NEXT: ret;
;
; CHECK-F16-LABEL: minimum_half(
@@ -646,25 +648,27 @@ define half @minimum_half(half %a, half %b) {
;
; CHECK-SM80-NOF16-LABEL: minimum_half(
; CHECK-SM80-NOF16: {
-; CHECK-SM80-NOF16-NEXT: .reg .pred %p<5>;
-; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>;
+; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>;
+; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<8>;
; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>;
; CHECK-SM80-NOF16-EMPTY:
; CHECK-SM80-NOF16-NEXT: // %bb.0:
; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0];
-; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p1, %rs1, 0;
; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1];
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs1;
-; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p2, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
-; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p3, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs5;
-; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p4, %r3, 0f00000000;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4;
-; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs6;
+; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
+; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2;
+; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p3, %rs1, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
+; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
+; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
+; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5;
+; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs7;
; CHECK-SM80-NOF16-NEXT: ret;
%x = call half @llvm.minimum.f16(half %a, half %b)
ret half %x
@@ -673,20 +677,22 @@ define half @minimum_half(half %a, half %b) {
define float @minimum_float(float %a, float %b) {
; CHECK-NOF16-LABEL: minimum_float(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<4>;
-; CHECK-NOF16-NEXT: .reg .b32 %r<7>;
+; CHECK-NOF16-NEXT: .reg .pred %p<5>;
+; CHECK-NOF16-NEXT: .reg .b32 %r<8>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_param_0];
-; CHECK-NOF16-NEXT: setp.ne.s32 %p1, %r1, 0;
; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_param_1];
-; CHECK-NOF16-NEXT: selp.f32 %r3, %r1, %r2, %p1;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r2;
-; CHECK-NOF16-NEXT: min.f32 %r4, %r1, %r2;
-; CHECK-NOF16-NEXT: selp.f32 %r5, 0f7FC00000, %r4, %p2;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r5, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r6, %r3, %r5, %p3;
-; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2;
+; CHECK-NOF16-NEXT: min.f32 %r3, %r1, %r2;
+; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1;
+; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2;
+; CHECK-NOF16-NEXT: setp.ne.s32 %p3, %r2, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4;
+; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NOF16-NEXT: ret;
;
; CHECK-F16-LABEL: minimum_float(
@@ -722,13 +728,13 @@ define float @minimum_imm1(float %a) {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm1_param_0];
-; CHECK-NOF16-NEXT: setp.ne.s32 %p1, %r1, 0;
-; CHECK-NOF16-NEXT: selp.f32 %r2, %r1, 0f00000000, %p1;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r1;
-; CHECK-NOF16-NEXT: min.f32 %r3, %r1, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p2;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r4, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r5, %r2, %r4, %p3;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3;
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NOF16-NEXT: ret;
;
@@ -763,13 +769,13 @@ define float @minimum_imm2(float %a) {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm2_param_0];
-; CHECK-NOF16-NEXT: setp.ne.s32 %p1, %r1, 0;
-; CHECK-NOF16-NEXT: selp.f32 %r2, %r1, 0f00000000, %p1;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r1;
-; CHECK-NOF16-NEXT: min.f32 %r3, %r1, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p2;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r4, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r5, %r2, %r4, %p3;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3;
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NOF16-NEXT: ret;
;
@@ -799,20 +805,22 @@ define float @minimum_imm2(float %a) {
define float @minimum_float_ftz(float %a, float %b) #1 {
; CHECK-NOF16-LABEL: minimum_float_ftz(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<4>;
-; CHECK-NOF16-NEXT: .reg .b32 %r<7>;
+; CHECK-NOF16-NEXT: .reg .pred %p<5>;
+; CHECK-NOF16-NEXT: .reg .b32 %r<8>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0];
-; CHECK-NOF16-NEXT: setp.ne.s32 %p1, %r1, 0;
; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1];
-; CHECK-NOF16-NEXT: selp.f32 %r3, %r1, %r2, %p1;
-; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p2, %r1, %r2;
-; CHECK-NOF16-NEXT: min.ftz.f32 %r4, %r1, %r2;
-; CHECK-NOF16-NEXT: selp.f32 %r5, 0f7FC00000, %r4, %p2;
-; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p3, %r5, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r6, %r3, %r5, %p3;
-; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2;
+; CHECK-NOF16-NEXT: min.ftz.f32 %r3, %r1, %r2;
+; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1;
+; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2;
+; CHECK-NOF16-NEXT: setp.ne.s32 %p3, %r2, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3;
+; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4;
+; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NOF16-NEXT: ret;
;
; CHECK-F16-LABEL: minimum_float_ftz(
@@ -843,20 +851,22 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
define double @minimum_double(double %a, double %b) {
; CHECK-LABEL: minimum_double(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-NEXT: .reg .pred %p<5>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [minimum_double_param_0];
-; CHECK-NEXT: setp.ne.s64 %p1, %rd1, 0;
; CHECK-NEXT: ld.param.b64 %rd2, [minimum_double_param_1];
-; CHECK-NEXT: selp.f64 %rd3, %rd1, %rd2, %p1;
-; CHECK-NEXT: setp.nan.f64 %p2, %rd1, %rd2;
-; CHECK-NEXT: min.f64 %rd4, %rd1, %rd2;
-; CHECK-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p2;
-; CHECK-NEXT: setp.eq.f64 %p3, %rd5, 0d0000000000000000;
-; CHECK-NEXT: selp.f64 %rd6, %rd3, %rd5, %p3;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2;
+; CHECK-NEXT: min.f64 %rd3, %rd1, %rd2;
+; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
+; CHECK-NEXT: setp.ne.s64 %p2, %rd1, 0;
+; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2;
+; CHECK-NEXT: setp.ne.s64 %p3, %rd2, 0;
+; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3;
+; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000;
+; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
; CHECK-NEXT: ret;
%x = call double @llvm.minimum.f64(double %a, double %b)
ret double %x
@@ -865,38 +875,42 @@ define double @minimum_double(double %a, double %b) {
define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-NOF16-LABEL: minimum_v2half(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<9>;
-; CHECK-NOF16-NEXT: .reg .b16 %rs<15>;
+; CHECK-NOF16-NEXT: .reg .pred %p<11>;
+; CHECK-NOF16-NEXT: .reg .b16 %rs<19>;
; CHECK-NOF16-NEXT: .reg .b32 %r<10>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0];
; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1];
; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2;
-; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1;
-; CHECK-NOF16-NEXT: setp.ne.s16 %p1, %rs4, 0;
-; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2;
+; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4;
-; CHECK-NOF16-NEXT: setp.lt.f32 %p2, %r4, %r3;
-; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p3, %r4, %r3;
-; CHECK-NOF16-NEXT: selp.b16 %rs9, 0x7E00, %rs8, %p3;
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs9;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r5, 0f00000000;
-; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
-; CHECK-NOF16-NEXT: setp.ne.s16 %p5, %rs3, 0;
-; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
+; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r4, %r3;
+; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3;
+; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2;
+; CHECK-NOF16-NEXT: setp.ne.s16 %p3, %rs4, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
+; CHECK-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000;
+; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3;
; CHECK-NOF16-NEXT: setp.lt.f32 %p6, %r7, %r6;
-; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6;
+; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6;
; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6;
-; CHECK-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7;
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs13;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p8, %r8, 0f00000000;
-; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8;
-; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs10};
+; CHECK-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7;
+; CHECK-NOF16-NEXT: setp.ne.s16 %p8, %rs3, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
+; CHECK-NOF16-NEXT: setp.ne.s16 %p9, %rs1, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs15;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000;
+; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10;
+; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13};
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r9;
; CHECK-NOF16-NEXT: ret;
;
@@ -913,38 +927,42 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
;
; CHECK-SM80-NOF16-LABEL: minimum_v2half(
; CHECK-SM80-NOF16: {
-; CHECK-SM80-NOF16-NEXT: .reg .pred %p<9>;
-; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>;
+; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>;
+; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<19>;
; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<10>;
; CHECK-SM80-NOF16-EMPTY:
; CHECK-SM80-NOF16-NEXT: // %bb.0:
; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0];
; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1];
; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2;
-; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1;
-; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p1, %rs4, 0;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs2;
+; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs4;
-; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p2, %r4, %r3;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
-; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p3, %r4, %r3;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, 0x7E00, %rs8, %p3;
-; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs9;
-; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p4, %r5, 0f00000000;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
-; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p5, %rs3, 0;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
+; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %r4, %r3;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
+; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2;
+; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p3, %rs4, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
+; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
+; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs6;
+; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs3;
; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p6, %r7, %r6;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6;
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7;
-; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs13;
-; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p8, %r8, 0f00000000;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8;
-; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs10};
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7;
+; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p8, %rs3, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
+; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p9, %rs1, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
+; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs15;
+; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10;
+; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13};
; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r9;
; CHECK-SM80-NOF16-NEXT: ret;
%x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
@@ -1135,25 +1153,27 @@ define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) {
define half @maximum_half(half %a, half %b) {
; CHECK-NOF16-LABEL: maximum_half(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<5>;
-; CHECK-NOF16-NEXT: .reg .b16 %rs<7>;
+; CHECK-NOF16-NEXT: .reg .pred %p<6>;
+; CHECK-NOF16-NEXT: .reg .b16 %rs<8>;
; CHECK-NOF16-NEXT: .reg .b32 %r<4>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0];
-; CHECK-NOF16-NEXT: setp.eq.s16 %p1, %rs1, 0;
; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1];
-; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs1;
-; CHECK-NOF16-NEXT: setp.gt.f32 %p2, %r2, %r1;
-; CHECK-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p3, %r2, %r1;
-; CHECK-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p3;
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs5;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r3, 0f00000000;
-; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4;
-; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs6;
+; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r2, %r1;
+; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
+; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2;
+; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
+; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
+; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5;
+; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs7;
; CHECK-NOF16-NEXT: ret;
;
; CHECK-F16-LABEL: maximum_half(
@@ -1169,25 +1189,27 @@ define half @maximum_half(half %a, half %b) {
;
; CHECK-SM80-NOF16-LABEL: maximum_half(
; CHECK-SM80-NOF16: {
-; CHECK-SM80-NOF16-NEXT: .reg .pred %p<5>;
-; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>;
+; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>;
+; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<8>;
; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>;
; CHECK-SM80-NOF16-EMPTY:
; CHECK-SM80-NOF16-NEXT: // %bb.0:
; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0];
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p1, %rs1, 0;
; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1];
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs1;
-; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p2, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
-; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p3, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs5;
-; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p4, %r3, 0f00000000;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4;
-; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs6;
+; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
+; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2;
+; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
+; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
+; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
+; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5;
+; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs7;
; CHECK-SM80-NOF16-NEXT: ret;
%x = call half @llvm.maximum.f16(half %a, half %b)
ret half %x
@@ -1196,19 +1218,17 @@ define half @maximum_half(half %a, half %b) {
define float @maximum_imm1(float %a) {
; CHECK-NOF16-LABEL: maximum_imm1(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<4>;
-; CHECK-NOF16-NEXT: .reg .b32 %r<6>;
+; CHECK-NOF16-NEXT: .reg .pred %p<3>;
+; CHECK-NOF16-NEXT: .reg .b32 %r<5>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm1_param_0];
-; CHECK-NOF16-NEXT: setp.eq.s32 %p1, %r1, 0;
-; CHECK-NOF16-NEXT: selp.f32 %r2, %r1, 0f00000000, %p1;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r1;
-; CHECK-NOF16-NEXT: max.f32 %r3, %r1, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p2;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r4, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r5, %r2, %r4, %p3;
-; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT: max.f32 %r2, %r1, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r3, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r4, 0f00000000, %r3, %p2;
+; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NOF16-NEXT: ret;
;
; CHECK-F16-LABEL: maximum_imm1(
@@ -1237,19 +1257,17 @@ define float @maximum_imm1(float %a) {
define float @maximum_imm2(float %a) {
; CHECK-NOF16-LABEL: maximum_imm2(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<4>;
-; CHECK-NOF16-NEXT: .reg .b32 %r<6>;
+; CHECK-NOF16-NEXT: .reg .pred %p<3>;
+; CHECK-NOF16-NEXT: .reg .b32 %r<5>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm2_param_0];
-; CHECK-NOF16-NEXT: setp.eq.s32 %p1, %r1, 0;
-; CHECK-NOF16-NEXT: selp.f32 %r2, %r1, 0f00000000, %p1;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r1;
-; CHECK-NOF16-NEXT: max.f32 %r3, %r1, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p2;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r4, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r5, %r2, %r4, %p3;
-; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
+; CHECK-NOF16-NEXT: max.f32 %r2, %r1, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r3, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r4, 0f00000000, %r3, %p2;
+; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NOF16-NEXT: ret;
;
; CHECK-F16-LABEL: maximum_imm2(
@@ -1278,20 +1296,22 @@ define float @maximum_imm2(float %a) {
define float @maximum_float(float %a, float %b) {
; CHECK-NOF16-LABEL: maximum_float(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<4>;
-; CHECK-NOF16-NEXT: .reg .b32 %r<7>;
+; CHECK-NOF16-NEXT: .reg .pred %p<5>;
+; CHECK-NOF16-NEXT: .reg .b32 %r<8>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_param_0];
-; CHECK-NOF16-NEXT: setp.eq.s32 %p1, %r1, 0;
; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_param_1];
-; CHECK-NOF16-NEXT: selp.f32 %r3, %r1, %r2, %p1;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r2;
-; CHECK-NOF16-NEXT: max.f32 %r4, %r1, %r2;
-; CHECK-NOF16-NEXT: selp.f32 %r5, 0f7FC00000, %r4, %p2;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r5, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r6, %r3, %r5, %p3;
-; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2;
+; CHECK-NOF16-NEXT: max.f32 %r3, %r1, %r2;
+; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1;
+; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2;
+; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4;
+; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NOF16-NEXT: ret;
;
; CHECK-F16-LABEL: maximum_float(
@@ -1322,20 +1342,22 @@ define float @maximum_float(float %a, float %b) {
define float @maximum_float_ftz(float %a, float %b) #1 {
; CHECK-NOF16-LABEL: maximum_float_ftz(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<4>;
-; CHECK-NOF16-NEXT: .reg .b32 %r<7>;
+; CHECK-NOF16-NEXT: .reg .pred %p<5>;
+; CHECK-NOF16-NEXT: .reg .b32 %r<8>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0];
-; CHECK-NOF16-NEXT: setp.eq.s32 %p1, %r1, 0;
; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1];
-; CHECK-NOF16-NEXT: selp.f32 %r3, %r1, %r2, %p1;
-; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p2, %r1, %r2;
-; CHECK-NOF16-NEXT: max.ftz.f32 %r4, %r1, %r2;
-; CHECK-NOF16-NEXT: selp.f32 %r5, 0f7FC00000, %r4, %p2;
-; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p3, %r5, 0f00000000;
-; CHECK-NOF16-NEXT: selp.f32 %r6, %r3, %r5, %p3;
-; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2;
+; CHECK-NOF16-NEXT: max.ftz.f32 %r3, %r1, %r2;
+; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1;
+; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2;
+; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0;
+; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3;
+; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000;
+; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4;
+; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NOF16-NEXT: ret;
;
; CHECK-F16-LABEL: maximum_float_ftz(
@@ -1366,20 +1388,22 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
define double @maximum_double(double %a, double %b) {
; CHECK-LABEL: maximum_double(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-NEXT: .reg .pred %p<5>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [maximum_double_param_0];
-; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0;
; CHECK-NEXT: ld.param.b64 %rd2, [maximum_double_param_1];
-; CHECK-NEXT: selp.f64 %rd3, %rd1, %rd2, %p1;
-; CHECK-NEXT: setp.nan.f64 %p2, %rd1, %rd2;
-; CHECK-NEXT: max.f64 %rd4, %rd1, %rd2;
-; CHECK-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p2;
-; CHECK-NEXT: setp.eq.f64 %p3, %rd5, 0d0000000000000000;
-; CHECK-NEXT: selp.f64 %rd6, %rd3, %rd5, %p3;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2;
+; CHECK-NEXT: max.f64 %rd3, %rd1, %rd2;
+; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
+; CHECK-NEXT: setp.eq.s64 %p2, %rd1, 0;
+; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2;
+; CHECK-NEXT: setp.eq.s64 %p3, %rd2, 0;
+; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3;
+; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000;
+; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
; CHECK-NEXT: ret;
%x = call double @llvm.maximum.f64(double %a, double %b)
ret double %x
@@ -1388,38 +1412,42 @@ define double @maximum_double(double %a, double %b) {
define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-NOF16-LABEL: maximum_v2half(
; CHECK-NOF16: {
-; CHECK-NOF16-NEXT: .reg .pred %p<9>;
-; CHECK-NOF16-NEXT: .reg .b16 %rs<15>;
+; CHECK-NOF16-NEXT: .reg .pred %p<11>;
+; CHECK-NOF16-NEXT: .reg .b16 %rs<19>;
; CHECK-NOF16-NEXT: .reg .b32 %r<10>;
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0];
; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1];
; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2;
-; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p1, %rs4, 0;
-; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2;
+; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4;
-; CHECK-NOF16-NEXT: setp.gt.f32 %p2, %r4, %r3;
-; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
-; CHECK-NOF16-NEXT: setp.nan.f32 %p3, %r4, %r3;
-; CHECK-NOF16-NEXT: selp.b16 %rs9, 0x7E00, %rs8, %p3;
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs9;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r5, 0f00000000;
-; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p5, %rs3, 0;
-; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
+; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r4, %r3;
+; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
+; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3;
+; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2;
+; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
+; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000;
+; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3;
; CHECK-NOF16-NEXT: setp.gt.f32 %p6, %r7, %r6;
-; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6;
+; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6;
; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6;
-; CHECK-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7;
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs13;
-; CHECK-NOF16-NEXT: setp.eq.f32 %p8, %r8, 0f00000000;
-; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8;
-; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs10};
+; CHECK-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7;
+; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
+; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0;
+; CHECK-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs15;
+; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000;
+; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10;
+; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13};
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r9;
; CHECK-NOF16-NEXT: ret;
;
@@ -1436,38 +1464,42 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
;
; CHECK-SM80-NOF16-LABEL: maximum_v2half(
; CHECK-SM80-NOF16: {
-; CHECK-SM80-NOF16-NEXT: .reg .pred %p<9>;
-; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>;
+; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>;
+; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<19>;
; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<10>;
; CHECK-SM80-NOF16-EMPTY:
; CHECK-SM80-NOF16-NEXT: // %bb.0:
; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0];
; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1];
; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2;
-; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p1, %rs4, 0;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs2;
+; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs4;
-; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p2, %r4, %r3;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
-; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p3, %r4, %r3;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, 0x7E00, %rs8, %p3;
-; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs9;
-; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p4, %r5, 0f00000000;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p5, %rs3, 0;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
+; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %r4, %r3;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
+; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2;
+; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
+; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
+; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs6;
+; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs3;
; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p6, %r7, %r6;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6;
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7;
-; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs13;
-; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p8, %r8, 0f00000000;
-; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8;
-; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs10};
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7;
+; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
+; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
+; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs15;
+; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000;
+; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10;
+; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13};
; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r9;
; CHECK-SM80-NOF16-NEXT: ret;
%x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
index 33a3155ab62bb..48107c8f63727 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
@@ -5,38 +5,41 @@ define fp128 @f128_minimum(fp128 %a, fp128 %b) {
; CHECK-LABEL: f128_minimum:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: stxv 34, -16(1)
-; CHECK-NEXT: xxlor 0, 34, 34
-; CHECK-NEXT: lbz 3, -1(1)
-; CHECK-NEXT: cmplwi 3, 0
-; CHECK-NEXT: beq 0, .LBB0_7
-; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: stxv 35, -32(1)
; CHECK-NEXT: xscmpuqp 0, 2, 3
-; CHECK-NEXT: bge 0, .LBB0_8
+; CHECK-NEXT: vmr 4, 2
+; CHECK-NEXT: blt 0, .LBB0_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: vmr 4, 3
; CHECK-NEXT: .LBB0_2: # %entry
; CHECK-NEXT: bnu 0, .LBB0_4
-; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-NEXT: lxv 34, 0(3)
+; CHECK-NEXT: lxv 36, 0(3)
; CHECK-NEXT: .LBB0_4: # %entry
-; CHECK-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI0_1 at toc@l
-; CHECK-NEXT: lxv 35, 0(3)
-; CHECK-NEXT: xscmpuqp 0, 2, 3
-; CHECK-NEXT: beq 0, .LBB0_6
+; CHECK-NEXT: lbz 3, -1(1)
+; CHECK-NEXT: cmplwi 3, 0
+; CHECK-NEXT: bne 0, .LBB0_6
; CHECK-NEXT: # %bb.5: # %entry
-; CHECK-NEXT: xxlor 0, 34, 34
+; CHECK-NEXT: vmr 2, 4
; CHECK-NEXT: .LBB0_6: # %entry
-; CHECK-NEXT: xxlor 34, 0, 0
-; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB0_7: # %entry
-; CHECK-NEXT: xxlor 0, 35, 35
-; CHECK-NEXT: xscmpuqp 0, 2, 3
-; CHECK-NEXT: blt 0, .LBB0_2
+; CHECK-NEXT: lbz 3, -17(1)
+; CHECK-NEXT: cmplwi 3, 0
+; CHECK-NEXT: bne 0, .LBB0_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: vmr 3, 2
; CHECK-NEXT: .LBB0_8: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI0_1 at toc@l
+; CHECK-NEXT: lxv 34, 0(3)
+; CHECK-NEXT: xscmpuqp 0, 4, 2
+; CHECK-NEXT: beq 0, .LBB0_10
+; CHECK-NEXT: # %bb.9: # %entry
+; CHECK-NEXT: vmr 3, 4
+; CHECK-NEXT: .LBB0_10: # %entry
; CHECK-NEXT: vmr 2, 3
-; CHECK-NEXT: bun 0, .LBB0_3
-; CHECK-NEXT: b .LBB0_4
+; CHECK-NEXT: blr
entry:
%m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b)
ret fp128 %m
@@ -46,38 +49,41 @@ define fp128 @f128_maximum(fp128 %a, fp128 %b) {
; CHECK-LABEL: f128_maximum:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: stxv 34, -16(1)
-; CHECK-NEXT: xxlor 0, 34, 34
-; CHECK-NEXT: lbz 3, -1(1)
-; CHECK-NEXT: cmplwi 3, 0
-; CHECK-NEXT: bne 0, .LBB1_7
-; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: stxv 35, -32(1)
; CHECK-NEXT: xscmpuqp 0, 2, 3
-; CHECK-NEXT: ble 0, .LBB1_8
+; CHECK-NEXT: vmr 4, 2
+; CHECK-NEXT: bgt 0, .LBB1_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: vmr 4, 3
; CHECK-NEXT: .LBB1_2: # %entry
; CHECK-NEXT: bnu 0, .LBB1_4
-; CHECK-NEXT: .LBB1_3:
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-NEXT: lxv 34, 0(3)
+; CHECK-NEXT: lxv 36, 0(3)
; CHECK-NEXT: .LBB1_4: # %entry
-; CHECK-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI1_1 at toc@l
-; CHECK-NEXT: lxv 35, 0(3)
-; CHECK-NEXT: xscmpuqp 0, 2, 3
+; CHECK-NEXT: lbz 3, -1(1)
+; CHECK-NEXT: cmplwi 3, 0
; CHECK-NEXT: beq 0, .LBB1_6
; CHECK-NEXT: # %bb.5: # %entry
-; CHECK-NEXT: xxlor 0, 34, 34
+; CHECK-NEXT: vmr 2, 4
; CHECK-NEXT: .LBB1_6: # %entry
-; CHECK-NEXT: xxlor 34, 0, 0
-; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB1_7: # %entry
-; CHECK-NEXT: xxlor 0, 35, 35
-; CHECK-NEXT: xscmpuqp 0, 2, 3
-; CHECK-NEXT: bgt 0, .LBB1_2
+; CHECK-NEXT: lbz 3, -17(1)
+; CHECK-NEXT: cmplwi 3, 0
+; CHECK-NEXT: beq 0, .LBB1_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: vmr 3, 2
; CHECK-NEXT: .LBB1_8: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI1_1 at toc@l
+; CHECK-NEXT: lxv 34, 0(3)
+; CHECK-NEXT: xscmpuqp 0, 4, 2
+; CHECK-NEXT: beq 0, .LBB1_10
+; CHECK-NEXT: # %bb.9: # %entry
+; CHECK-NEXT: vmr 3, 4
+; CHECK-NEXT: .LBB1_10: # %entry
; CHECK-NEXT: vmr 2, 3
-; CHECK-NEXT: bun 0, .LBB1_3
-; CHECK-NEXT: b .LBB1_4
+; CHECK-NEXT: blr
entry:
%m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b)
ret fp128 %m
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
index 6bf84a4893b5f..e199a1eab49d5 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -6,37 +6,41 @@
define float @f32_minimum(float %a, float %b) {
; NOVSX-LABEL: f32_minimum:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: stfs 1, -4(1)
-; NOVSX-NEXT: fmr 0, 2
-; NOVSX-NEXT: lwz 3, -4(1)
-; NOVSX-NEXT: cmpwi 3, 0
-; NOVSX-NEXT: bc 4, 2, .LBB0_7
-; NOVSX-NEXT: # %bb.1: # %entry
; NOVSX-NEXT: fcmpu 0, 1, 2
-; NOVSX-NEXT: bc 4, 0, .LBB0_8
+; NOVSX-NEXT: fmr 3, 1
+; NOVSX-NEXT: stfs 1, -8(1)
+; NOVSX-NEXT: stfs 2, -4(1)
+; NOVSX-NEXT: bc 12, 0, .LBB0_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 3, 2
; NOVSX-NEXT: .LBB0_2: # %entry
; NOVSX-NEXT: bc 4, 3, .LBB0_4
-; NOVSX-NEXT: .LBB0_3:
+; NOVSX-NEXT: # %bb.3:
; NOVSX-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
-; NOVSX-NEXT: lfs 1, .LCPI0_0 at toc@l(3)
+; NOVSX-NEXT: lfs 3, .LCPI0_0 at toc@l(3)
; NOVSX-NEXT: .LBB0_4: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
-; NOVSX-NEXT: lfs 2, .LCPI0_1 at toc@l(3)
-; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: lwz 3, -8(1)
+; NOVSX-NEXT: fmr 0, 3
+; NOVSX-NEXT: cmpwi 3, 0
; NOVSX-NEXT: bc 12, 2, .LBB0_6
; NOVSX-NEXT: # %bb.5: # %entry
; NOVSX-NEXT: fmr 0, 1
; NOVSX-NEXT: .LBB0_6: # %entry
+; NOVSX-NEXT: lwz 3, -4(1)
+; NOVSX-NEXT: cmpwi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB0_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: .LBB0_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI0_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 3, 1
+; NOVSX-NEXT: bc 12, 2, .LBB0_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 0, 3
+; NOVSX-NEXT: .LBB0_10: # %entry
; NOVSX-NEXT: fmr 1, 0
; NOVSX-NEXT: blr
-; NOVSX-NEXT: .LBB0_7: # %entry
-; NOVSX-NEXT: fmr 0, 1
-; NOVSX-NEXT: fcmpu 0, 1, 2
-; NOVSX-NEXT: bc 12, 0, .LBB0_2
-; NOVSX-NEXT: .LBB0_8: # %entry
-; NOVSX-NEXT: fmr 1, 2
-; NOVSX-NEXT: bc 12, 3, .LBB0_3
-; NOVSX-NEXT: b .LBB0_4
;
; VSX-LABEL: f32_minimum:
; VSX: # %bb.0: # %entry
@@ -69,37 +73,40 @@ entry:
define float @f32_maximum(float %a, float %b) {
; NOVSX-LABEL: f32_maximum:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: stfs 1, -4(1)
+; NOVSX-NEXT: fcmpu 0, 1, 2
; NOVSX-NEXT: fmr 0, 1
-; NOVSX-NEXT: lwz 3, -4(1)
-; NOVSX-NEXT: cmpwi 3, 0
-; NOVSX-NEXT: bc 4, 2, .LBB1_7
+; NOVSX-NEXT: stfs 1, -8(1)
+; NOVSX-NEXT: stfs 2, -4(1)
+; NOVSX-NEXT: bc 12, 1, .LBB1_2
; NOVSX-NEXT: # %bb.1: # %entry
-; NOVSX-NEXT: fcmpu 0, 1, 2
-; NOVSX-NEXT: bc 4, 1, .LBB1_8
+; NOVSX-NEXT: fmr 0, 2
; NOVSX-NEXT: .LBB1_2: # %entry
; NOVSX-NEXT: bc 4, 3, .LBB1_4
-; NOVSX-NEXT: .LBB1_3:
+; NOVSX-NEXT: # %bb.3:
; NOVSX-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
-; NOVSX-NEXT: lfs 1, .LCPI1_0 at toc@l(3)
+; NOVSX-NEXT: lfs 0, .LCPI1_0 at toc@l(3)
; NOVSX-NEXT: .LBB1_4: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
-; NOVSX-NEXT: lfs 2, .LCPI1_1 at toc@l(3)
-; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: lwz 3, -8(1)
+; NOVSX-NEXT: cmpwi 3, 0
; NOVSX-NEXT: bc 12, 2, .LBB1_6
; NOVSX-NEXT: # %bb.5: # %entry
-; NOVSX-NEXT: fmr 0, 1
-; NOVSX-NEXT: .LBB1_6: # %entry
; NOVSX-NEXT: fmr 1, 0
-; NOVSX-NEXT: blr
-; NOVSX-NEXT: .LBB1_7: # %entry
-; NOVSX-NEXT: fmr 0, 2
-; NOVSX-NEXT: fcmpu 0, 1, 2
-; NOVSX-NEXT: bc 12, 1, .LBB1_2
+; NOVSX-NEXT: .LBB1_6: # %entry
+; NOVSX-NEXT: lwz 3, -4(1)
+; NOVSX-NEXT: cmpwi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB1_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 2, 1
; NOVSX-NEXT: .LBB1_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI1_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB1_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB1_10: # %entry
; NOVSX-NEXT: fmr 1, 2
-; NOVSX-NEXT: bc 12, 3, .LBB1_3
-; NOVSX-NEXT: b .LBB1_4
+; NOVSX-NEXT: blr
;
; VSX-LABEL: f32_maximum:
; VSX: # %bb.0: # %entry
@@ -132,37 +139,41 @@ entry:
define double @f64_minimum(double %a, double %b) {
; NOVSX-LABEL: f64_minimum:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: stfd 1, -8(1)
-; NOVSX-NEXT: fmr 0, 2
-; NOVSX-NEXT: ld 3, -8(1)
-; NOVSX-NEXT: cmpdi 3, 0
-; NOVSX-NEXT: bc 4, 2, .LBB2_7
-; NOVSX-NEXT: # %bb.1: # %entry
; NOVSX-NEXT: fcmpu 0, 1, 2
-; NOVSX-NEXT: bc 4, 0, .LBB2_8
+; NOVSX-NEXT: fmr 3, 1
+; NOVSX-NEXT: stfd 1, -16(1)
+; NOVSX-NEXT: stfd 2, -8(1)
+; NOVSX-NEXT: bc 12, 0, .LBB2_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 3, 2
; NOVSX-NEXT: .LBB2_2: # %entry
; NOVSX-NEXT: bc 4, 3, .LBB2_4
-; NOVSX-NEXT: .LBB2_3:
+; NOVSX-NEXT: # %bb.3:
; NOVSX-NEXT: addis 3, 2, .LCPI2_0 at toc@ha
-; NOVSX-NEXT: lfs 1, .LCPI2_0 at toc@l(3)
+; NOVSX-NEXT: lfs 3, .LCPI2_0 at toc@l(3)
; NOVSX-NEXT: .LBB2_4: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI2_1 at toc@ha
-; NOVSX-NEXT: lfs 2, .LCPI2_1 at toc@l(3)
-; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: fmr 0, 3
+; NOVSX-NEXT: cmpdi 3, 0
; NOVSX-NEXT: bc 12, 2, .LBB2_6
; NOVSX-NEXT: # %bb.5: # %entry
; NOVSX-NEXT: fmr 0, 1
; NOVSX-NEXT: .LBB2_6: # %entry
+; NOVSX-NEXT: ld 3, -8(1)
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB2_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: .LBB2_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI2_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI2_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 3, 1
+; NOVSX-NEXT: bc 12, 2, .LBB2_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 0, 3
+; NOVSX-NEXT: .LBB2_10: # %entry
; NOVSX-NEXT: fmr 1, 0
; NOVSX-NEXT: blr
-; NOVSX-NEXT: .LBB2_7: # %entry
-; NOVSX-NEXT: fmr 0, 1
-; NOVSX-NEXT: fcmpu 0, 1, 2
-; NOVSX-NEXT: bc 12, 0, .LBB2_2
-; NOVSX-NEXT: .LBB2_8: # %entry
-; NOVSX-NEXT: fmr 1, 2
-; NOVSX-NEXT: bc 12, 3, .LBB2_3
-; NOVSX-NEXT: b .LBB2_4
;
; VSX-LABEL: f64_minimum:
; VSX: # %bb.0: # %entry
@@ -195,37 +206,40 @@ entry:
define double @f64_maximum(double %a, double %b) {
; NOVSX-LABEL: f64_maximum:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: stfd 1, -8(1)
+; NOVSX-NEXT: fcmpu 0, 1, 2
; NOVSX-NEXT: fmr 0, 1
-; NOVSX-NEXT: ld 3, -8(1)
-; NOVSX-NEXT: cmpdi 3, 0
-; NOVSX-NEXT: bc 4, 2, .LBB3_7
+; NOVSX-NEXT: stfd 1, -16(1)
+; NOVSX-NEXT: stfd 2, -8(1)
+; NOVSX-NEXT: bc 12, 1, .LBB3_2
; NOVSX-NEXT: # %bb.1: # %entry
-; NOVSX-NEXT: fcmpu 0, 1, 2
-; NOVSX-NEXT: bc 4, 1, .LBB3_8
+; NOVSX-NEXT: fmr 0, 2
; NOVSX-NEXT: .LBB3_2: # %entry
; NOVSX-NEXT: bc 4, 3, .LBB3_4
-; NOVSX-NEXT: .LBB3_3:
+; NOVSX-NEXT: # %bb.3:
; NOVSX-NEXT: addis 3, 2, .LCPI3_0 at toc@ha
-; NOVSX-NEXT: lfs 1, .LCPI3_0 at toc@l(3)
+; NOVSX-NEXT: lfs 0, .LCPI3_0 at toc@l(3)
; NOVSX-NEXT: .LBB3_4: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI3_1 at toc@ha
-; NOVSX-NEXT: lfs 2, .LCPI3_1 at toc@l(3)
-; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: cmpdi 3, 0
; NOVSX-NEXT: bc 12, 2, .LBB3_6
; NOVSX-NEXT: # %bb.5: # %entry
-; NOVSX-NEXT: fmr 0, 1
-; NOVSX-NEXT: .LBB3_6: # %entry
; NOVSX-NEXT: fmr 1, 0
-; NOVSX-NEXT: blr
-; NOVSX-NEXT: .LBB3_7: # %entry
-; NOVSX-NEXT: fmr 0, 2
-; NOVSX-NEXT: fcmpu 0, 1, 2
-; NOVSX-NEXT: bc 12, 1, .LBB3_2
+; NOVSX-NEXT: .LBB3_6: # %entry
+; NOVSX-NEXT: ld 3, -8(1)
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB3_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 2, 1
; NOVSX-NEXT: .LBB3_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI3_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI3_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB3_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB3_10: # %entry
; NOVSX-NEXT: fmr 1, 2
-; NOVSX-NEXT: bc 12, 3, .LBB3_3
-; NOVSX-NEXT: b .LBB3_4
+; NOVSX-NEXT: blr
;
; VSX-LABEL: f64_maximum:
; VSX: # %bb.0: # %entry
@@ -272,7 +286,10 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
; NOVSX-NEXT: vxor 5, 5, 5
; NOVSX-NEXT: vcmpequw 0, 2, 5
; NOVSX-NEXT: vnot 0, 0
-; NOVSX-NEXT: vsel 2, 3, 2, 0
+; NOVSX-NEXT: vsel 2, 4, 2, 0
+; NOVSX-NEXT: vcmpequw 0, 3, 5
+; NOVSX-NEXT: vnot 0, 0
+; NOVSX-NEXT: vsel 2, 2, 3, 0
; NOVSX-NEXT: vcmpeqfp 3, 4, 5
; NOVSX-NEXT: vsel 2, 4, 2, 3
; NOVSX-NEXT: blr
@@ -292,8 +309,11 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
; VSX-NEXT: lxvd2x 2, 0, 3
; VSX-NEXT: xxsel 0, 0, 2, 1
; VSX-NEXT: xxlnor 1, 37, 37
+; VSX-NEXT: xxsel 1, 0, 34, 1
+; VSX-NEXT: vcmpequw 2, 3, 4
+; VSX-NEXT: xxlnor 2, 34, 34
+; VSX-NEXT: xxsel 1, 1, 35, 2
; VSX-NEXT: xvcmpeqsp 2, 0, 36
-; VSX-NEXT: xxsel 1, 35, 34, 1
; VSX-NEXT: xxsel 34, 0, 1, 2
; VSX-NEXT: blr
;
@@ -311,8 +331,11 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
; AIX-NEXT: lxvw4x 2, 0, 3
; AIX-NEXT: xxsel 0, 0, 2, 1
; AIX-NEXT: xxlnor 1, 37, 37
+; AIX-NEXT: xxsel 1, 0, 34, 1
+; AIX-NEXT: vcmpequw 2, 3, 4
+; AIX-NEXT: xxlnor 2, 34, 34
+; AIX-NEXT: xxsel 1, 1, 35, 2
; AIX-NEXT: xvcmpeqsp 2, 0, 36
-; AIX-NEXT: xxsel 1, 35, 34, 1
; AIX-NEXT: xxsel 34, 0, 1, 2
; AIX-NEXT: blr
entry:
@@ -336,7 +359,9 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
; NOVSX-NEXT: vsel 4, 4, 0, 5
; NOVSX-NEXT: vxor 5, 5, 5
; NOVSX-NEXT: vcmpequw 0, 2, 5
-; NOVSX-NEXT: vsel 2, 3, 2, 0
+; NOVSX-NEXT: vsel 2, 4, 2, 0
+; NOVSX-NEXT: vcmpequw 0, 3, 5
+; NOVSX-NEXT: vsel 2, 2, 3, 0
; NOVSX-NEXT: vcmpeqfp 3, 4, 5
; NOVSX-NEXT: vsel 2, 4, 2, 3
; NOVSX-NEXT: blr
@@ -355,8 +380,10 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
; VSX-NEXT: xxlor 1, 2, 1
; VSX-NEXT: lxvd2x 2, 0, 3
; VSX-NEXT: xxsel 0, 0, 2, 1
-; VSX-NEXT: xxsel 1, 35, 34, 37
; VSX-NEXT: xvcmpeqsp 2, 0, 36
+; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: vcmpequw 2, 3, 4
+; VSX-NEXT: xxsel 1, 1, 35, 34
; VSX-NEXT: xxsel 34, 0, 1, 2
; VSX-NEXT: blr
;
@@ -373,8 +400,10 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
; AIX-NEXT: xxlor 1, 2, 1
; AIX-NEXT: lxvw4x 2, 0, 3
; AIX-NEXT: xxsel 0, 0, 2, 1
-; AIX-NEXT: xxsel 1, 35, 34, 37
; AIX-NEXT: xvcmpeqsp 2, 0, 36
+; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: vcmpequw 2, 3, 4
+; AIX-NEXT: xxsel 1, 1, 35, 34
; AIX-NEXT: xxsel 34, 0, 1, 2
; AIX-NEXT: blr
entry:
@@ -385,63 +414,75 @@ entry:
define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
; NOVSX-LABEL: v2f64_minimum:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: stfd 1, -8(1)
+; NOVSX-NEXT: fcmpu 0, 1, 3
; NOVSX-NEXT: fmr 0, 1
-; NOVSX-NEXT: fmr 1, 3
-; NOVSX-NEXT: stfd 2, -16(1)
-; NOVSX-NEXT: ld 3, -8(1)
-; NOVSX-NEXT: cmpdi 3, 0
-; NOVSX-NEXT: bc 12, 2, .LBB6_2
+; NOVSX-NEXT: stfd 1, -16(1)
+; NOVSX-NEXT: stfd 3, -8(1)
+; NOVSX-NEXT: stfd 2, -32(1)
+; NOVSX-NEXT: stfd 4, -24(1)
+; NOVSX-NEXT: bc 12, 0, .LBB6_2
; NOVSX-NEXT: # %bb.1: # %entry
-; NOVSX-NEXT: fmr 1, 0
+; NOVSX-NEXT: fmr 1, 3
; NOVSX-NEXT: .LBB6_2: # %entry
-; NOVSX-NEXT: fcmpu 0, 0, 3
-; NOVSX-NEXT: bc 12, 0, .LBB6_4
+; NOVSX-NEXT: addis 3, 2, .LCPI6_0 at toc@ha
+; NOVSX-NEXT: lfs 5, .LCPI6_0 at toc@l(3)
+; NOVSX-NEXT: fmr 6, 5
+; NOVSX-NEXT: bc 12, 3, .LBB6_4
; NOVSX-NEXT: # %bb.3: # %entry
-; NOVSX-NEXT: fmr 0, 3
+; NOVSX-NEXT: fmr 6, 1
; NOVSX-NEXT: .LBB6_4: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI6_0 at toc@ha
-; NOVSX-NEXT: lfs 3, .LCPI6_0 at toc@l(3)
-; NOVSX-NEXT: fmr 6, 3
-; NOVSX-NEXT: bc 12, 3, .LBB6_6
+; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: fmr 1, 6
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB6_6
; NOVSX-NEXT: # %bb.5: # %entry
-; NOVSX-NEXT: fmr 6, 0
+; NOVSX-NEXT: fmr 1, 0
; NOVSX-NEXT: .LBB6_6: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI6_1 at toc@ha
-; NOVSX-NEXT: lfs 5, .LCPI6_1 at toc@l(3)
-; NOVSX-NEXT: fcmpu 0, 6, 5
+; NOVSX-NEXT: ld 3, -8(1)
+; NOVSX-NEXT: cmpdi 3, 0
; NOVSX-NEXT: bc 12, 2, .LBB6_8
; NOVSX-NEXT: # %bb.7: # %entry
-; NOVSX-NEXT: fmr 1, 6
+; NOVSX-NEXT: fmr 1, 3
; NOVSX-NEXT: .LBB6_8: # %entry
-; NOVSX-NEXT: ld 3, -16(1)
-; NOVSX-NEXT: fmr 0, 4
-; NOVSX-NEXT: cmpdi 3, 0
-; NOVSX-NEXT: bc 4, 2, .LBB6_13
+; NOVSX-NEXT: addis 3, 2, .LCPI6_1 at toc@ha
+; NOVSX-NEXT: lfs 3, .LCPI6_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 6, 3
+; NOVSX-NEXT: bc 12, 2, .LBB6_10
; NOVSX-NEXT: # %bb.9: # %entry
-; NOVSX-NEXT: fcmpu 0, 2, 4
-; NOVSX-NEXT: bc 4, 0, .LBB6_14
+; NOVSX-NEXT: fmr 1, 6
; NOVSX-NEXT: .LBB6_10: # %entry
-; NOVSX-NEXT: bc 4, 3, .LBB6_15
-; NOVSX-NEXT: .LBB6_11: # %entry
-; NOVSX-NEXT: fcmpu 0, 3, 5
-; NOVSX-NEXT: bc 4, 2, .LBB6_16
-; NOVSX-NEXT: .LBB6_12: # %entry
-; NOVSX-NEXT: fmr 2, 0
-; NOVSX-NEXT: blr
-; NOVSX-NEXT: .LBB6_13: # %entry
-; NOVSX-NEXT: fmr 0, 2
; NOVSX-NEXT: fcmpu 0, 2, 4
-; NOVSX-NEXT: bc 12, 0, .LBB6_10
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: bc 12, 0, .LBB6_12
+; NOVSX-NEXT: # %bb.11: # %entry
+; NOVSX-NEXT: fmr 0, 4
+; NOVSX-NEXT: .LBB6_12: # %entry
+; NOVSX-NEXT: bc 12, 3, .LBB6_14
+; NOVSX-NEXT: # %bb.13: # %entry
+; NOVSX-NEXT: fmr 5, 0
; NOVSX-NEXT: .LBB6_14: # %entry
-; NOVSX-NEXT: fmr 2, 4
-; NOVSX-NEXT: bc 12, 3, .LBB6_11
-; NOVSX-NEXT: .LBB6_15: # %entry
-; NOVSX-NEXT: fmr 3, 2
-; NOVSX-NEXT: fcmpu 0, 3, 5
-; NOVSX-NEXT: bc 12, 2, .LBB6_12
+; NOVSX-NEXT: ld 3, -32(1)
+; NOVSX-NEXT: fmr 0, 5
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB6_16
+; NOVSX-NEXT: # %bb.15: # %entry
+; NOVSX-NEXT: fmr 0, 2
; NOVSX-NEXT: .LBB6_16: # %entry
-; NOVSX-NEXT: fmr 0, 3
+; NOVSX-NEXT: ld 3, -24(1)
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 4, 2, .LBB6_19
+; NOVSX-NEXT: # %bb.17: # %entry
+; NOVSX-NEXT: fcmpu 0, 5, 3
+; NOVSX-NEXT: bc 4, 2, .LBB6_20
+; NOVSX-NEXT: .LBB6_18: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: blr
+; NOVSX-NEXT: .LBB6_19: # %entry
+; NOVSX-NEXT: fmr 0, 4
+; NOVSX-NEXT: fcmpu 0, 5, 3
+; NOVSX-NEXT: bc 12, 2, .LBB6_18
+; NOVSX-NEXT: .LBB6_20: # %entry
+; NOVSX-NEXT: fmr 0, 5
; NOVSX-NEXT: fmr 2, 0
; NOVSX-NEXT: blr
;
@@ -460,7 +501,10 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
; VSX-NEXT: vcmpequd 5, 2, 4
; VSX-NEXT: xxlnor 37, 37, 37
; VSX-NEXT: xxsel 0, 0, 2, 1
-; VSX-NEXT: xxsel 1, 35, 34, 37
+; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: vcmpequd 2, 3, 4
+; VSX-NEXT: xxlnor 34, 34, 34
+; VSX-NEXT: xxsel 1, 1, 35, 34
; VSX-NEXT: xvcmpeqdp 34, 0, 36
; VSX-NEXT: xxsel 34, 0, 1, 34
; VSX-NEXT: blr
@@ -479,7 +523,10 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
; AIX-NEXT: vcmpequd 5, 2, 4
; AIX-NEXT: xxlnor 37, 37, 37
; AIX-NEXT: xxsel 0, 0, 2, 1
-; AIX-NEXT: xxsel 1, 35, 34, 37
+; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: vcmpequd 2, 3, 4
+; AIX-NEXT: xxlnor 34, 34, 34
+; AIX-NEXT: xxsel 1, 1, 35, 34
; AIX-NEXT: xvcmpeqdp 34, 0, 36
; AIX-NEXT: xxsel 34, 0, 1, 34
; AIX-NEXT: blr
@@ -491,63 +538,71 @@ entry:
define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
; NOVSX-LABEL: v2f64_maximum:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: stfd 1, -8(1)
-; NOVSX-NEXT: fmr 0, 1
-; NOVSX-NEXT: stfd 2, -16(1)
-; NOVSX-NEXT: ld 3, -8(1)
-; NOVSX-NEXT: cmpdi 3, 0
-; NOVSX-NEXT: bc 12, 2, .LBB7_2
+; NOVSX-NEXT: fcmpu 0, 1, 3
+; NOVSX-NEXT: fmr 6, 1
+; NOVSX-NEXT: stfd 1, -16(1)
+; NOVSX-NEXT: stfd 3, -8(1)
+; NOVSX-NEXT: stfd 2, -32(1)
+; NOVSX-NEXT: stfd 4, -24(1)
+; NOVSX-NEXT: bc 12, 1, .LBB7_2
; NOVSX-NEXT: # %bb.1: # %entry
-; NOVSX-NEXT: fmr 1, 3
+; NOVSX-NEXT: fmr 6, 3
; NOVSX-NEXT: .LBB7_2: # %entry
-; NOVSX-NEXT: fcmpu 0, 0, 3
-; NOVSX-NEXT: bc 12, 1, .LBB7_4
+; NOVSX-NEXT: addis 3, 2, .LCPI7_0 at toc@ha
+; NOVSX-NEXT: lfs 0, .LCPI7_0 at toc@l(3)
+; NOVSX-NEXT: fmr 5, 0
+; NOVSX-NEXT: bc 12, 3, .LBB7_4
; NOVSX-NEXT: # %bb.3: # %entry
-; NOVSX-NEXT: fmr 0, 3
+; NOVSX-NEXT: fmr 5, 6
; NOVSX-NEXT: .LBB7_4: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI7_0 at toc@ha
-; NOVSX-NEXT: lfs 3, .LCPI7_0 at toc@l(3)
-; NOVSX-NEXT: fmr 6, 3
-; NOVSX-NEXT: bc 12, 3, .LBB7_6
+; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB7_6
; NOVSX-NEXT: # %bb.5: # %entry
-; NOVSX-NEXT: fmr 6, 0
+; NOVSX-NEXT: fmr 1, 5
; NOVSX-NEXT: .LBB7_6: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI7_1 at toc@ha
-; NOVSX-NEXT: lfs 5, .LCPI7_1 at toc@l(3)
-; NOVSX-NEXT: fcmpu 0, 6, 5
+; NOVSX-NEXT: ld 3, -8(1)
+; NOVSX-NEXT: cmpdi 3, 0
; NOVSX-NEXT: bc 12, 2, .LBB7_8
; NOVSX-NEXT: # %bb.7: # %entry
-; NOVSX-NEXT: fmr 1, 6
+; NOVSX-NEXT: fmr 3, 1
; NOVSX-NEXT: .LBB7_8: # %entry
-; NOVSX-NEXT: ld 3, -16(1)
-; NOVSX-NEXT: fmr 0, 2
-; NOVSX-NEXT: cmpdi 3, 0
-; NOVSX-NEXT: bc 4, 2, .LBB7_13
+; NOVSX-NEXT: addis 3, 2, .LCPI7_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI7_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 5, 1
+; NOVSX-NEXT: bc 12, 2, .LBB7_10
; NOVSX-NEXT: # %bb.9: # %entry
-; NOVSX-NEXT: fcmpu 0, 2, 4
-; NOVSX-NEXT: bc 4, 1, .LBB7_14
+; NOVSX-NEXT: fmr 3, 5
; NOVSX-NEXT: .LBB7_10: # %entry
-; NOVSX-NEXT: bc 4, 3, .LBB7_15
-; NOVSX-NEXT: .LBB7_11: # %entry
-; NOVSX-NEXT: fcmpu 0, 3, 5
-; NOVSX-NEXT: bc 4, 2, .LBB7_16
-; NOVSX-NEXT: .LBB7_12: # %entry
-; NOVSX-NEXT: fmr 2, 0
-; NOVSX-NEXT: blr
-; NOVSX-NEXT: .LBB7_13: # %entry
-; NOVSX-NEXT: fmr 0, 4
; NOVSX-NEXT: fcmpu 0, 2, 4
-; NOVSX-NEXT: bc 12, 1, .LBB7_10
+; NOVSX-NEXT: fmr 5, 2
+; NOVSX-NEXT: bc 12, 1, .LBB7_12
+; NOVSX-NEXT: # %bb.11: # %entry
+; NOVSX-NEXT: fmr 5, 4
+; NOVSX-NEXT: .LBB7_12: # %entry
+; NOVSX-NEXT: bc 12, 3, .LBB7_14
+; NOVSX-NEXT: # %bb.13: # %entry
+; NOVSX-NEXT: fmr 0, 5
; NOVSX-NEXT: .LBB7_14: # %entry
-; NOVSX-NEXT: fmr 2, 4
-; NOVSX-NEXT: bc 12, 3, .LBB7_11
-; NOVSX-NEXT: .LBB7_15: # %entry
-; NOVSX-NEXT: fmr 3, 2
-; NOVSX-NEXT: fcmpu 0, 3, 5
-; NOVSX-NEXT: bc 12, 2, .LBB7_12
-; NOVSX-NEXT: .LBB7_16: # %entry
-; NOVSX-NEXT: fmr 0, 3
+; NOVSX-NEXT: ld 3, -32(1)
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB7_16
+; NOVSX-NEXT: # %bb.15: # %entry
; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB7_16: # %entry
+; NOVSX-NEXT: ld 3, -24(1)
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB7_18
+; NOVSX-NEXT: # %bb.17: # %entry
+; NOVSX-NEXT: fmr 4, 2
+; NOVSX-NEXT: .LBB7_18: # %entry
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB7_20
+; NOVSX-NEXT: # %bb.19: # %entry
+; NOVSX-NEXT: fmr 4, 0
+; NOVSX-NEXT: .LBB7_20: # %entry
+; NOVSX-NEXT: fmr 1, 3
+; NOVSX-NEXT: fmr 2, 4
; NOVSX-NEXT: blr
;
; VSX-LABEL: v2f64_maximum:
@@ -564,7 +619,9 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
; VSX-NEXT: xxlxor 36, 36, 36
; VSX-NEXT: vcmpequd 5, 2, 4
; VSX-NEXT: xxsel 0, 0, 2, 1
-; VSX-NEXT: xxsel 1, 35, 34, 37
+; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: vcmpequd 2, 3, 4
+; VSX-NEXT: xxsel 1, 1, 35, 34
; VSX-NEXT: xvcmpeqdp 34, 0, 36
; VSX-NEXT: xxsel 34, 0, 1, 34
; VSX-NEXT: blr
@@ -582,7 +639,9 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
; AIX-NEXT: xxlxor 36, 36, 36
; AIX-NEXT: vcmpequd 5, 2, 4
; AIX-NEXT: xxsel 0, 0, 2, 1
-; AIX-NEXT: xxsel 1, 35, 34, 37
+; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: vcmpequd 2, 3, 4
+; AIX-NEXT: xxsel 1, 1, 35, 34
; AIX-NEXT: xvcmpeqdp 34, 0, 36
; AIX-NEXT: xxsel 34, 0, 1, 34
; AIX-NEXT: blr
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll
index fd76bd56bce43..d3f85ce51edea 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll
@@ -15,52 +15,63 @@ define half @maximum_half(half %x, half %y) nounwind {
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fsts {{[0-9]+}}(%esp)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fnstsw %ax
+; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
+; CHECK-NEXT: sahf
; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: je .LBB0_2
+; CHECK-NEXT: ja .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fld %st(1)
; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucom %st(2)
-; CHECK-NEXT: fnstsw %ax
-; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
-; CHECK-NEXT: sahf
-; CHECK-NEXT: ja .LBB0_4
+; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NEXT: jp .LBB0_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: fstp %st(2)
-; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
-; CHECK-NEXT: jp .LBB0_6
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB0_6
; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: fld %st(0)
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: .LBB0_6:
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB0_8
+; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: fstp %st(2)
; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fxch %st(2)
-; CHECK-NEXT: fucom %st(2)
-; CHECK-NEXT: fstp %st(2)
+; CHECK-NEXT: .LBB0_8:
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
-; CHECK-NEXT: jne .LBB0_7
-; CHECK-NEXT: # %bb.8:
-; CHECK-NEXT: jp .LBB0_11
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: fstp %st(1)
-; CHECK-NEXT: jmp .LBB0_10
-; CHECK-NEXT: .LBB0_7:
+; CHECK-NEXT: jne .LBB0_9
+; CHECK-NEXT: # %bb.10:
+; CHECK-NEXT: jp .LBB0_13
+; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: .LBB0_10:
+; CHECK-NEXT: jmp .LBB0_12
+; CHECK-NEXT: .LBB0_9:
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: .LBB0_12:
; CHECK-NEXT: fldz
-; CHECK-NEXT: .LBB0_11:
-; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: .LBB0_13:
+; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: calll __truncsfhf2
; CHECK-NEXT: addl $24, %esp
@@ -73,57 +84,68 @@ define half @maximum_half(half %x, half %y) nounwind {
define float @maximum_float(float %x, float %y) nounwind {
; CHECK-LABEL: maximum_float:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fsts (%esp)
-; CHECK-NEXT: cmpl $0, (%esp)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fsts {{[0-9]+}}(%esp)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fnstsw %ax
+; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
+; CHECK-NEXT: sahf
; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: je .LBB1_2
+; CHECK-NEXT: ja .LBB1_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fld %st(1)
; CHECK-NEXT: .LBB1_2:
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucom %st(2)
-; CHECK-NEXT: fnstsw %ax
-; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
-; CHECK-NEXT: sahf
-; CHECK-NEXT: ja .LBB1_4
+; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NEXT: jp .LBB1_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: .LBB1_4:
-; CHECK-NEXT: fstp %st(2)
-; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
-; CHECK-NEXT: jp .LBB1_6
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: cmpl $0, (%esp)
+; CHECK-NEXT: je .LBB1_6
; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: fld %st(0)
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: .LBB1_6:
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB1_8
+; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: fstp %st(2)
; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fxch %st(2)
-; CHECK-NEXT: fucom %st(2)
-; CHECK-NEXT: fstp %st(2)
+; CHECK-NEXT: .LBB1_8:
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
-; CHECK-NEXT: jne .LBB1_7
-; CHECK-NEXT: # %bb.8:
-; CHECK-NEXT: jp .LBB1_11
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: fstp %st(1)
-; CHECK-NEXT: jmp .LBB1_10
-; CHECK-NEXT: .LBB1_7:
+; CHECK-NEXT: jne .LBB1_9
+; CHECK-NEXT: # %bb.10:
+; CHECK-NEXT: jp .LBB1_13
+; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: .LBB1_10:
+; CHECK-NEXT: jmp .LBB1_12
+; CHECK-NEXT: .LBB1_9:
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: .LBB1_12:
; CHECK-NEXT: fldz
-; CHECK-NEXT: .LBB1_11:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: popl %eax
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: .LBB1_13:
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: retl
%res = call float @llvm.maximum.f32(float %x, float %y)
ret float %res
@@ -132,57 +154,68 @@ define float @maximum_float(float %x, float %y) nounwind {
define double @maximum_double(double %x, double %y) nounwind {
; CHECK-LABEL: maximum_double:
; CHECK: # %bb.0:
-; CHECK-NEXT: subl $12, %esp
+; CHECK-NEXT: subl $20, %esp
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: fstl {{[0-9]+}}(%esp)
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstl (%esp)
-; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fnstsw %ax
+; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
+; CHECK-NEXT: sahf
; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: je .LBB2_2
+; CHECK-NEXT: ja .LBB2_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fld %st(1)
; CHECK-NEXT: .LBB2_2:
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucom %st(2)
-; CHECK-NEXT: fnstsw %ax
-; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
-; CHECK-NEXT: sahf
-; CHECK-NEXT: ja .LBB2_4
+; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NEXT: jp .LBB2_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: .LBB2_4:
-; CHECK-NEXT: fstp %st(2)
-; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
-; CHECK-NEXT: jp .LBB2_6
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB2_6
; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: fld %st(0)
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: .LBB2_6:
+; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB2_8
+; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: fstp %st(2)
; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fxch %st(2)
-; CHECK-NEXT: fucom %st(2)
-; CHECK-NEXT: fstp %st(2)
+; CHECK-NEXT: .LBB2_8:
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
-; CHECK-NEXT: jne .LBB2_7
-; CHECK-NEXT: # %bb.8:
-; CHECK-NEXT: jp .LBB2_11
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: fstp %st(1)
-; CHECK-NEXT: jmp .LBB2_10
-; CHECK-NEXT: .LBB2_7:
+; CHECK-NEXT: jne .LBB2_9
+; CHECK-NEXT: # %bb.10:
+; CHECK-NEXT: jp .LBB2_13
+; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: .LBB2_10:
+; CHECK-NEXT: jmp .LBB2_12
+; CHECK-NEXT: .LBB2_9:
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: .LBB2_12:
; CHECK-NEXT: fldz
-; CHECK-NEXT: .LBB2_11:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: addl $12, %esp
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: .LBB2_13:
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: addl $20, %esp
; CHECK-NEXT: retl
%res = call double @llvm.maximum.f64(double %x, double %y)
ret double %res
@@ -239,57 +272,63 @@ define half @minimum_half(half %x, half %y) nounwind {
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: jne .LBB4_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: fld %st(1)
-; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fsts {{[0-9]+}}(%esp)
; CHECK-NEXT: fucom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
; CHECK-NEXT: fld %st(1)
-; CHECK-NEXT: ja .LBB4_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: ja .LBB4_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: fxch %st(2)
-; CHECK-NEXT: fucompp
+; CHECK-NEXT: fucom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
-; CHECK-NEXT: jp .LBB4_6
-; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: jp .LBB4_4
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fxch %st(3)
+; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: fstp %st(3)
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: jne .LBB4_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fld %st(1)
; CHECK-NEXT: .LBB4_6:
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: jne .LBB4_8
+; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucom %st(1)
-; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: .LBB4_8:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fucom %st(2)
+; CHECK-NEXT: fstp %st(2)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
-; CHECK-NEXT: jne .LBB4_7
-; CHECK-NEXT: # %bb.8:
-; CHECK-NEXT: jp .LBB4_11
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: jmp .LBB4_10
-; CHECK-NEXT: .LBB4_7:
+; CHECK-NEXT: jne .LBB4_9
+; CHECK-NEXT: # %bb.10:
+; CHECK-NEXT: jp .LBB4_13
+; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: fstp %st(1)
-; CHECK-NEXT: .LBB4_10:
+; CHECK-NEXT: jmp .LBB4_12
+; CHECK-NEXT: .LBB4_9:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: .LBB4_12:
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: .LBB4_11:
-; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: .LBB4_13:
+; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: calll __truncsfhf2
; CHECK-NEXT: addl $24, %esp
@@ -302,62 +341,68 @@ define half @minimum_half(half %x, half %y) nounwind {
define float @minimum_float(float %x, float %y) nounwind {
; CHECK-LABEL: minimum_float:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fsts (%esp)
-; CHECK-NEXT: cmpl $0, (%esp)
-; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: jne .LBB5_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: fld %st(1)
-; CHECK-NEXT: .LBB5_2:
-; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fsts {{[0-9]+}}(%esp)
; CHECK-NEXT: fucom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
; CHECK-NEXT: fld %st(1)
-; CHECK-NEXT: ja .LBB5_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: ja .LBB5_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: .LBB5_4:
+; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: fxch %st(2)
-; CHECK-NEXT: fucompp
+; CHECK-NEXT: fucom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
-; CHECK-NEXT: jp .LBB5_6
-; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: jp .LBB5_4
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fxch %st(3)
+; CHECK-NEXT: .LBB5_4:
+; CHECK-NEXT: fstp %st(3)
+; CHECK-NEXT: cmpl $0, (%esp)
+; CHECK-NEXT: jne .LBB5_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fld %st(1)
; CHECK-NEXT: .LBB5_6:
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: jne .LBB5_8
+; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucom %st(1)
-; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: .LBB5_8:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fucom %st(2)
+; CHECK-NEXT: fstp %st(2)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
-; CHECK-NEXT: jne .LBB5_7
-; CHECK-NEXT: # %bb.8:
-; CHECK-NEXT: jp .LBB5_11
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: jmp .LBB5_10
-; CHECK-NEXT: .LBB5_7:
+; CHECK-NEXT: jne .LBB5_9
+; CHECK-NEXT: # %bb.10:
+; CHECK-NEXT: jp .LBB5_13
+; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: fstp %st(1)
-; CHECK-NEXT: .LBB5_10:
+; CHECK-NEXT: jmp .LBB5_12
+; CHECK-NEXT: .LBB5_9:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: .LBB5_12:
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: .LBB5_11:
-; CHECK-NEXT: fstp %st(1)
-; CHECK-NEXT: popl %eax
+; CHECK-NEXT: .LBB5_13:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: retl
%res = call float @llvm.minimum.f32(float %x, float %y)
ret float %res
@@ -366,62 +411,68 @@ define float @minimum_float(float %x, float %y) nounwind {
define double @minimum_double(double %x, double %y) nounwind {
; CHECK-LABEL: minimum_double:
; CHECK: # %bb.0:
-; CHECK-NEXT: subl $12, %esp
+; CHECK-NEXT: subl $20, %esp
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: fstl {{[0-9]+}}(%esp)
+; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstl (%esp)
-; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: jne .LBB6_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: fld %st(1)
-; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT: fxch %st(2)
; CHECK-NEXT: fucom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
; CHECK-NEXT: fld %st(1)
-; CHECK-NEXT: ja .LBB6_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: ja .LBB6_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: .LBB6_4:
+; CHECK-NEXT: .LBB6_2:
; CHECK-NEXT: fxch %st(2)
-; CHECK-NEXT: fucompp
+; CHECK-NEXT: fucom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
-; CHECK-NEXT: jp .LBB6_6
-; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: jp .LBB6_4
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fxch %st(3)
+; CHECK-NEXT: .LBB6_4:
+; CHECK-NEXT: fstp %st(3)
+; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: jne .LBB6_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fld %st(1)
; CHECK-NEXT: .LBB6_6:
+; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: jne .LBB6_8
+; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucom %st(1)
-; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: .LBB6_8:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: fldz
+; CHECK-NEXT: fxch %st(2)
+; CHECK-NEXT: fucom %st(2)
+; CHECK-NEXT: fstp %st(2)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
-; CHECK-NEXT: jne .LBB6_7
-; CHECK-NEXT: # %bb.8:
-; CHECK-NEXT: jp .LBB6_11
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: jmp .LBB6_10
-; CHECK-NEXT: .LBB6_7:
+; CHECK-NEXT: jne .LBB6_9
+; CHECK-NEXT: # %bb.10:
+; CHECK-NEXT: jp .LBB6_13
+; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: fstp %st(1)
-; CHECK-NEXT: .LBB6_10:
+; CHECK-NEXT: jmp .LBB6_12
+; CHECK-NEXT: .LBB6_9:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: .LBB6_12:
; CHECK-NEXT: fldz
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: .LBB6_11:
-; CHECK-NEXT: fstp %st(1)
-; CHECK-NEXT: addl $12, %esp
+; CHECK-NEXT: .LBB6_13:
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: addl $20, %esp
; CHECK-NEXT: retl
%res = call double @llvm.minimum.f64(double %x, double %y)
ret double %res
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index ccebd3c5db062..80e3a017a44e3 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -1972,8 +1972,10 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
; AVX512-NEXT: cmovpl %eax, %ecx
; AVX512-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm3
; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm4
-; AVX512-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm5
+; AVX512-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm0
+; AVX512-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm4
+; AVX512-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpblendvb %xmm3, %xmm0, %xmm2, %xmm0
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r12
@@ -2652,135 +2654,150 @@ define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind {
; SSE2-LABEL: maximum_fp128:
; SSE2: # %bb.0:
; SSE2-NEXT: subq $88, %rsp
-; SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE2-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: je .LBB35_2
-; SSE2-NEXT: # %bb.1:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: .LBB35_2:
-; SSE2-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
; SSE2-NEXT: callq __gttf2 at PLT
-; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE2-NEXT: testl %eax, %eax
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: jg .LBB35_4
-; SSE2-NEXT: # %bb.3:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: .LBB35_4:
-; SSE2-NEXT: movaps %xmm2, (%rsp) # 16-byte Spill
+; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE2-NEXT: jg .LBB35_2
+; SSE2-NEXT: # %bb.1:
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: .LBB35_2:
+; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; SSE2-NEXT: callq __unordtf2 at PLT
; SSE2-NEXT: testl %eax, %eax
-; SSE2-NEXT: jne .LBB35_5
-; SSE2-NEXT: # %bb.6:
-; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; SSE2-NEXT: jmp .LBB35_7
-; SSE2-NEXT: .LBB35_5:
+; SSE2-NEXT: jne .LBB35_3
+; SSE2-NEXT: # %bb.4:
+; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE2-NEXT: jmp .LBB35_5
+; SSE2-NEXT: .LBB35_3:
; SSE2-NEXT: movaps {{.*#+}} xmm0 = [NaN]
+; SSE2-NEXT: .LBB35_5:
+; SSE2-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
+; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE2-NEXT: je .LBB35_7
+; SSE2-NEXT: # %bb.6:
+; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; SSE2-NEXT: .LBB35_7:
+; SSE2-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
+; SSE2-NEXT: je .LBB35_9
+; SSE2-NEXT: # %bb.8:
+; SSE2-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; SSE2-NEXT: .LBB35_9:
+; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE2-NEXT: callq __eqtf2 at PLT
+; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: je .LBB35_11
+; SSE2-NEXT: # %bb.10:
; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE2-NEXT: je .LBB35_9
-; SSE2-NEXT: # %bb.8:
-; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; SSE2-NEXT: .LBB35_9:
+; SSE2-NEXT: .LBB35_11:
; SSE2-NEXT: addq $88, %rsp
; SSE2-NEXT: retq
;
; AVX-LABEL: maximum_fp128:
; AVX: # %bb.0:
; AVX-NEXT: subq $88, %rsp
-; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
-; AVX-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovaps %xmm0, %xmm2
-; AVX-NEXT: je .LBB35_2
-; AVX-NEXT: # %bb.1:
-; AVX-NEXT: vmovaps %xmm1, %xmm2
-; AVX-NEXT: .LBB35_2:
-; AVX-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp)
; AVX-NEXT: callq __gttf2 at PLT
-; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; AVX-NEXT: testl %eax, %eax
-; AVX-NEXT: vmovaps %xmm0, %xmm2
-; AVX-NEXT: jg .LBB35_4
-; AVX-NEXT: # %bb.3:
-; AVX-NEXT: vmovaps %xmm1, %xmm2
-; AVX-NEXT: .LBB35_4:
-; AVX-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX-NEXT: jg .LBB35_2
+; AVX-NEXT: # %bb.1:
+; AVX-NEXT: vmovaps %xmm1, %xmm0
+; AVX-NEXT: .LBB35_2:
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX-NEXT: callq __unordtf2 at PLT
; AVX-NEXT: testl %eax, %eax
-; AVX-NEXT: jne .LBB35_5
-; AVX-NEXT: # %bb.6:
-; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX-NEXT: jmp .LBB35_7
-; AVX-NEXT: .LBB35_5:
+; AVX-NEXT: jne .LBB35_3
+; AVX-NEXT: # %bb.4:
+; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX-NEXT: jmp .LBB35_5
+; AVX-NEXT: .LBB35_3:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [NaN]
+; AVX-NEXT: .LBB35_5:
+; AVX-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX-NEXT: je .LBB35_7
+; AVX-NEXT: # %bb.6:
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX-NEXT: .LBB35_7:
+; AVX-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
+; AVX-NEXT: je .LBB35_9
+; AVX-NEXT: # %bb.8:
+; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX-NEXT: .LBB35_9:
+; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX-NEXT: callq __eqtf2 at PLT
+; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVX-NEXT: testl %eax, %eax
+; AVX-NEXT: je .LBB35_11
+; AVX-NEXT: # %bb.10:
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; AVX-NEXT: je .LBB35_9
-; AVX-NEXT: # %bb.8:
-; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX-NEXT: .LBB35_9:
+; AVX-NEXT: .LBB35_11:
; AVX-NEXT: addq $88, %rsp
; AVX-NEXT: retq
;
; AVX10_2-LABEL: maximum_fp128:
; AVX10_2: # %bb.0:
; AVX10_2-NEXT: subq $88, %rsp
-; AVX10_2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
-; AVX10_2-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
-; AVX10_2-NEXT: vmovaps %xmm0, %xmm2
-; AVX10_2-NEXT: je .LBB35_2
-; AVX10_2-NEXT: # %bb.1:
-; AVX10_2-NEXT: vmovaps %xmm1, %xmm2
-; AVX10_2-NEXT: .LBB35_2:
-; AVX10_2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX10_2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; AVX10_2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX10_2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX10_2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; AVX10_2-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp)
; AVX10_2-NEXT: callq __gttf2 at PLT
-; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; AVX10_2-NEXT: testl %eax, %eax
-; AVX10_2-NEXT: vmovaps %xmm0, %xmm2
-; AVX10_2-NEXT: jg .LBB35_4
-; AVX10_2-NEXT: # %bb.3:
-; AVX10_2-NEXT: vmovaps %xmm1, %xmm2
-; AVX10_2-NEXT: .LBB35_4:
-; AVX10_2-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX10_2-NEXT: jg .LBB35_2
+; AVX10_2-NEXT: # %bb.1:
+; AVX10_2-NEXT: vmovaps %xmm1, %xmm0
+; AVX10_2-NEXT: .LBB35_2:
+; AVX10_2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX10_2-NEXT: callq __unordtf2 at PLT
; AVX10_2-NEXT: testl %eax, %eax
-; AVX10_2-NEXT: jne .LBB35_5
-; AVX10_2-NEXT: # %bb.6:
-; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX10_2-NEXT: jmp .LBB35_7
-; AVX10_2-NEXT: .LBB35_5:
+; AVX10_2-NEXT: jne .LBB35_3
+; AVX10_2-NEXT: # %bb.4:
+; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX10_2-NEXT: jmp .LBB35_5
+; AVX10_2-NEXT: .LBB35_3:
; AVX10_2-NEXT: vmovaps {{.*#+}} xmm0 = [NaN]
+; AVX10_2-NEXT: .LBB35_5:
+; AVX10_2-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
+; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX10_2-NEXT: je .LBB35_7
+; AVX10_2-NEXT: # %bb.6:
+; AVX10_2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX10_2-NEXT: .LBB35_7:
+; AVX10_2-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
+; AVX10_2-NEXT: je .LBB35_9
+; AVX10_2-NEXT: # %bb.8:
+; AVX10_2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX10_2-NEXT: .LBB35_9:
+; AVX10_2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX10_2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX10_2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX10_2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX10_2-NEXT: callq __eqtf2 at PLT
+; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVX10_2-NEXT: testl %eax, %eax
+; AVX10_2-NEXT: je .LBB35_11
+; AVX10_2-NEXT: # %bb.10:
; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; AVX10_2-NEXT: je .LBB35_9
-; AVX10_2-NEXT: # %bb.8:
-; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX10_2-NEXT: .LBB35_9:
+; AVX10_2-NEXT: .LBB35_11:
; AVX10_2-NEXT: addq $88, %rsp
; AVX10_2-NEXT: retq
;
More information about the llvm-commits
mailing list