[llvm] 2ec43e4 - [LegalizeDAG] Implement promotion rules for SELECT_CC

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 5 09:23:02 PST 2021


Author: LemonBoy
Date: 2021-03-05T18:22:55+01:00
New Revision: 2ec43e416734bdead6bfeeaf5ab54c479b6f444e

URL: https://github.com/llvm/llvm-project/commit/2ec43e416734bdead6bfeeaf5ab54c479b6f444e
DIFF: https://github.com/llvm/llvm-project/commit/2ec43e416734bdead6bfeeaf5ab54c479b6f444e.diff

LOG: [LegalizeDAG] Implement promotion rules for SELECT_CC

Implement the promotion rule for SELECT_CC nodes by upcasting all the parameters and downcasting the result.
The AArch64 target makes use of this rule and, since it was not implemented, in some cases the instruction selector would hit an assertion upon encountering the illegal node.

This patch requires D97840, the included test cases hit both problems.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D97859

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 8534fcd60e59..a2d5c528b59e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4518,11 +4518,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Node->getOpcode() == ISD::STRICT_FSETCC ||
       Node->getOpcode() == ISD::STRICT_FSETCCS)
     OVT = Node->getOperand(1).getSimpleValueType();
-  if (Node->getOpcode() == ISD::BR_CC)
+  if (Node->getOpcode() == ISD::BR_CC ||
+      Node->getOpcode() == ISD::SELECT_CC)
     OVT = Node->getOperand(2).getSimpleValueType();
   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   SDLoc dl(Node);
-  SDValue Tmp1, Tmp2, Tmp3;
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
   switch (Node->getOpcode()) {
   case ISD::CTTZ:
   case ISD::CTTZ_ZERO_UNDEF:
@@ -4714,6 +4715,45 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   }
+
+  case ISD::SELECT_CC: {
+    SDValue Cond = Node->getOperand(4);
+    ISD::CondCode CCCode = cast<CondCodeSDNode>(Cond)->get();
+    // Type of the comparison operands.
+    MVT CVT = Node->getSimpleValueType(0);
+    assert(CVT == OVT && "not handled");
+
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    }
+
+    // Promote the comparison operands, if needed.
+    if (TLI.isCondCodeLegal(CCCode, CVT)) {
+      Tmp1 = Node->getOperand(0);
+      Tmp2 = Node->getOperand(1);
+    } else {
+      Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+      Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    }
+    // Cast the true/false operands.
+    Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+    Tmp4 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+
+    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, NVT, {Tmp1, Tmp2, Tmp3, Tmp4, Cond},
+                       Node->getFlags());
+
+    // Cast the result back to the original type.
+    if (ExtOp != ISD::FP_EXTEND)
+      Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1);
+    else
+      Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1,
+                         DAG.getIntPtrConstant(0, dl));
+
+    Results.push_back(Tmp1);
+    break;
+  }
+
   case ISD::SETCC:
   case ISD::STRICT_FSETCC:
   case ISD::STRICT_FSETCCS: {

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index d26db2aefee0..db70219f4376 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -8,6 +8,7 @@ declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
 declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
 
 declare half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
+declare half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a)
 declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
 declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
 declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
@@ -104,6 +105,221 @@ define half @test_v4f16_ninf(<4 x half> %a) nounwind {
   ret half %b
 }
 
+define half @test_v11f16(<11 x half> %a) nounwind {
+; CHECK-NOFP-LABEL: test_v11f16:
+; CHECK-NOFP:       // %bb.0:
+; CHECK-NOFP-NEXT:    ldr h18, [sp, #8]
+; CHECK-NOFP-NEXT:    ldr h17, [sp]
+; CHECK-NOFP-NEXT:    ldr h16, [sp, #16]
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s18, h18
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcmp s1, s18
+; CHECK-NOFP-NEXT:    fcvt s17, h17
+; CHECK-NOFP-NEXT:    adrp x8, .LCPI6_0
+; CHECK-NOFP-NEXT:    fcsel s1, s1, s18, gt
+; CHECK-NOFP-NEXT:    fcmp s0, s17
+; CHECK-NOFP-NEXT:    ldr h18, [x8, :lo12:.LCPI6_0]
+; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, gt
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s16, h16
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s2, s2, s16, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    mov w8, #-8388608
+; CHECK-NOFP-NEXT:    fcvt s18, h18
+; CHECK-NOFP-NEXT:    fcvt h2, s2
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fmov s17, w8
+; CHECK-NOFP-NEXT:    fcmp s3, s18
+; CHECK-NOFP-NEXT:    fcvt s1, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s4, h4
+; CHECK-NOFP-NEXT:    fcvt h2, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s4, s18
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s2
+; CHECK-NOFP-NEXT:    fcvt s5, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s5, s18
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s4, s5, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcvt s6, h6
+; CHECK-NOFP-NEXT:    fcvt h4, s4
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s6, s18
+; CHECK-NOFP-NEXT:    fcvt s1, h4
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s5, s6, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s7, h7
+; CHECK-NOFP-NEXT:    fcvt h4, s5
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s7, s18
+; CHECK-NOFP-NEXT:    fcvt s4, h4
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s5, s7, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s4
+; CHECK-NOFP-NEXT:    fcvt h5, s5
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s1, h5
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    ret
+;
+; CHECK-FP-LABEL: test_v11f16:
+; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    movi v16.8h, #252, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    ld1 { v16.h }[0], [x8]
+; CHECK-FP-NEXT:    add x8, sp, #8 // =8
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
+; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-FP-NEXT:    ld1 { v16.h }[1], [x8]
+; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-FP-NEXT:    mov v0.h[3], v3.h[0]
+; CHECK-FP-NEXT:    add x8, sp, #16 // =16
+; CHECK-FP-NEXT:    mov v0.h[4], v4.h[0]
+; CHECK-FP-NEXT:    ld1 { v16.h }[2], [x8]
+; CHECK-FP-NEXT:    mov v0.h[5], v5.h[0]
+; CHECK-FP-NEXT:    mov v0.h[6], v6.h[0]
+; CHECK-FP-NEXT:    mov v0.h[7], v7.h[0]
+; CHECK-FP-NEXT:    fmaxnm v0.8h, v0.8h, v16.8h
+; CHECK-FP-NEXT:    fmaxnmv h0, v0.8h
+; CHECK-FP-NEXT:    ret
+  %b = call nnan half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a)
+  ret half %b
+}
+
+define half @test_v11f16_ninf(<11 x half> %a) nounwind {
+; CHECK-NOFP-LABEL: test_v11f16_ninf:
+; CHECK-NOFP:       // %bb.0:
+; CHECK-NOFP-NEXT:    ldr h18, [sp, #8]
+; CHECK-NOFP-NEXT:    ldr h17, [sp]
+; CHECK-NOFP-NEXT:    ldr h16, [sp, #16]
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s18, h18
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcmp s1, s18
+; CHECK-NOFP-NEXT:    fcvt s17, h17
+; CHECK-NOFP-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NOFP-NEXT:    fcsel s1, s1, s18, gt
+; CHECK-NOFP-NEXT:    fcmp s0, s17
+; CHECK-NOFP-NEXT:    ldr h18, [x8, :lo12:.LCPI7_0]
+; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, gt
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s16, h16
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    mov w8, #57344
+; CHECK-NOFP-NEXT:    fcsel s2, s2, s16, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    movk w8, #51071, lsl #16
+; CHECK-NOFP-NEXT:    fcvt s18, h18
+; CHECK-NOFP-NEXT:    fcvt h2, s2
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fmov s17, w8
+; CHECK-NOFP-NEXT:    fcmp s3, s18
+; CHECK-NOFP-NEXT:    fcvt s1, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s4, h4
+; CHECK-NOFP-NEXT:    fcvt h2, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s4, s18
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s2
+; CHECK-NOFP-NEXT:    fcvt s5, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s5, s18
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s4, s5, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcvt s6, h6
+; CHECK-NOFP-NEXT:    fcvt h4, s4
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s6, s18
+; CHECK-NOFP-NEXT:    fcvt s1, h4
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s5, s6, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s7, h7
+; CHECK-NOFP-NEXT:    fcvt h4, s5
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s7, s18
+; CHECK-NOFP-NEXT:    fcvt s4, h4
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s5, s7, s17, gt
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s4
+; CHECK-NOFP-NEXT:    fcvt h5, s5
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s1, h5
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    ret
+;
+; CHECK-FP-LABEL: test_v11f16_ninf:
+; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    mvni v16.8h, #4, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    ld1 { v16.h }[0], [x8]
+; CHECK-FP-NEXT:    add x8, sp, #8 // =8
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
+; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-FP-NEXT:    ld1 { v16.h }[1], [x8]
+; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-FP-NEXT:    mov v0.h[3], v3.h[0]
+; CHECK-FP-NEXT:    add x8, sp, #16 // =16
+; CHECK-FP-NEXT:    mov v0.h[4], v4.h[0]
+; CHECK-FP-NEXT:    ld1 { v16.h }[2], [x8]
+; CHECK-FP-NEXT:    mov v0.h[5], v5.h[0]
+; CHECK-FP-NEXT:    mov v0.h[6], v6.h[0]
+; CHECK-FP-NEXT:    mov v0.h[7], v7.h[0]
+; CHECK-FP-NEXT:    fmaxnm v0.8h, v0.8h, v16.8h
+; CHECK-FP-NEXT:    fmaxnmv h0, v0.8h
+; CHECK-FP-NEXT:    ret
+  %b = call nnan ninf half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a)
+  ret half %b
+}
+
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32:
 ; CHECK:       // %bb.0:

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index 52d6e9773ab2..4925f049f953 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -8,6 +8,7 @@ declare double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
 declare fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a)
 
 declare half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
+declare half @llvm.vector.reduce.fmin.v11f16(<11 x half> %a)
 declare float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
 declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
 declare float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
@@ -104,6 +105,221 @@ define half @test_v4f16_ninf(<4 x half> %a) nounwind {
   ret half %b
 }
 
+define half @test_v11f16(<11 x half> %a) nounwind {
+; CHECK-NOFP-LABEL: test_v11f16:
+; CHECK-NOFP:       // %bb.0:
+; CHECK-NOFP-NEXT:    ldr h18, [sp, #8]
+; CHECK-NOFP-NEXT:    ldr h17, [sp]
+; CHECK-NOFP-NEXT:    ldr h16, [sp, #16]
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s18, h18
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcmp s1, s18
+; CHECK-NOFP-NEXT:    fcvt s17, h17
+; CHECK-NOFP-NEXT:    adrp x8, .LCPI6_0
+; CHECK-NOFP-NEXT:    fcsel s1, s1, s18, lt
+; CHECK-NOFP-NEXT:    fcmp s0, s17
+; CHECK-NOFP-NEXT:    ldr h18, [x8, :lo12:.LCPI6_0]
+; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, lt
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s16, h16
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s2, s2, s16, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    mov w8, #2139095040
+; CHECK-NOFP-NEXT:    fcvt s18, h18
+; CHECK-NOFP-NEXT:    fcvt h2, s2
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fmov s17, w8
+; CHECK-NOFP-NEXT:    fcmp s3, s18
+; CHECK-NOFP-NEXT:    fcvt s1, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s4, h4
+; CHECK-NOFP-NEXT:    fcvt h2, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s4, s18
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s2
+; CHECK-NOFP-NEXT:    fcvt s5, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s5, s18
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s4, s5, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcvt s6, h6
+; CHECK-NOFP-NEXT:    fcvt h4, s4
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s6, s18
+; CHECK-NOFP-NEXT:    fcvt s1, h4
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s5, s6, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s7, h7
+; CHECK-NOFP-NEXT:    fcvt h4, s5
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s7, s18
+; CHECK-NOFP-NEXT:    fcvt s4, h4
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s5, s7, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s4
+; CHECK-NOFP-NEXT:    fcvt h5, s5
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s1, h5
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    ret
+;
+; CHECK-FP-LABEL: test_v11f16:
+; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    movi v16.8h, #124, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    ld1 { v16.h }[0], [x8]
+; CHECK-FP-NEXT:    add x8, sp, #8 // =8
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
+; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-FP-NEXT:    ld1 { v16.h }[1], [x8]
+; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-FP-NEXT:    mov v0.h[3], v3.h[0]
+; CHECK-FP-NEXT:    add x8, sp, #16 // =16
+; CHECK-FP-NEXT:    mov v0.h[4], v4.h[0]
+; CHECK-FP-NEXT:    ld1 { v16.h }[2], [x8]
+; CHECK-FP-NEXT:    mov v0.h[5], v5.h[0]
+; CHECK-FP-NEXT:    mov v0.h[6], v6.h[0]
+; CHECK-FP-NEXT:    mov v0.h[7], v7.h[0]
+; CHECK-FP-NEXT:    fminnm v0.8h, v0.8h, v16.8h
+; CHECK-FP-NEXT:    fminnmv h0, v0.8h
+; CHECK-FP-NEXT:    ret
+  %b = call nnan half @llvm.vector.reduce.fmin.v11f16(<11 x half> %a)
+  ret half %b
+}
+
+define half @test_v11f16_ninf(<11 x half> %a) nounwind {
+; CHECK-NOFP-LABEL: test_v11f16_ninf:
+; CHECK-NOFP:       // %bb.0:
+; CHECK-NOFP-NEXT:    ldr h18, [sp, #8]
+; CHECK-NOFP-NEXT:    ldr h17, [sp]
+; CHECK-NOFP-NEXT:    ldr h16, [sp, #16]
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s18, h18
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcmp s1, s18
+; CHECK-NOFP-NEXT:    fcvt s17, h17
+; CHECK-NOFP-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NOFP-NEXT:    fcsel s1, s1, s18, lt
+; CHECK-NOFP-NEXT:    fcmp s0, s17
+; CHECK-NOFP-NEXT:    ldr h18, [x8, :lo12:.LCPI7_0]
+; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, lt
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s16, h16
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    mov w8, #57344
+; CHECK-NOFP-NEXT:    fcsel s2, s2, s16, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    movk w8, #18303, lsl #16
+; CHECK-NOFP-NEXT:    fcvt s18, h18
+; CHECK-NOFP-NEXT:    fcvt h2, s2
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fmov s17, w8
+; CHECK-NOFP-NEXT:    fcmp s3, s18
+; CHECK-NOFP-NEXT:    fcvt s1, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s4, h4
+; CHECK-NOFP-NEXT:    fcvt h2, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s4, s18
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s2
+; CHECK-NOFP-NEXT:    fcvt s5, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s5, s18
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s4, s5, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcvt s6, h6
+; CHECK-NOFP-NEXT:    fcvt h4, s4
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s6, s18
+; CHECK-NOFP-NEXT:    fcvt s1, h4
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s5, s6, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt s7, h7
+; CHECK-NOFP-NEXT:    fcvt h4, s5
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s7, s18
+; CHECK-NOFP-NEXT:    fcvt s4, h4
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcsel s5, s7, s17, lt
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s4
+; CHECK-NOFP-NEXT:    fcvt h5, s5
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s1, h5
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    ret
+;
+; CHECK-FP-LABEL: test_v11f16_ninf:
+; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    mvni v16.8h, #132, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    ld1 { v16.h }[0], [x8]
+; CHECK-FP-NEXT:    add x8, sp, #8 // =8
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
+; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-FP-NEXT:    ld1 { v16.h }[1], [x8]
+; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-FP-NEXT:    mov v0.h[3], v3.h[0]
+; CHECK-FP-NEXT:    add x8, sp, #16 // =16
+; CHECK-FP-NEXT:    mov v0.h[4], v4.h[0]
+; CHECK-FP-NEXT:    ld1 { v16.h }[2], [x8]
+; CHECK-FP-NEXT:    mov v0.h[5], v5.h[0]
+; CHECK-FP-NEXT:    mov v0.h[6], v6.h[0]
+; CHECK-FP-NEXT:    mov v0.h[7], v7.h[0]
+; CHECK-FP-NEXT:    fminnm v0.8h, v0.8h, v16.8h
+; CHECK-FP-NEXT:    fminnmv h0, v0.8h
+; CHECK-FP-NEXT:    ret
+  %b = call nnan ninf half @llvm.vector.reduce.fmin.v11f16(<11 x half> %a)
+  ret half %b
+}
+
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32:
 ; CHECK:       // %bb.0:


        


More information about the llvm-commits mailing list