[llvm] 07ccf65 - x[AArch64][GlobalISel] Enable vector support for G_SELECT->G_FMAXIMUM/MINIMUM.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 3 13:40:02 PDT 2022


Author: Amara Emerson
Date: 2022-10-03T21:39:52+01:00
New Revision: 07ccf651b95b3d92904e5d513772ad51567d8334

URL: https://github.com/llvm/llvm-project/commit/07ccf651b95b3d92904e5d513772ad51567d8334
DIFF: https://github.com/llvm/llvm-project/commit/07ccf651b95b3d92904e5d513772ad51567d8334.diff

LOG: x[AArch64][GlobalISel] Enable vector support for G_SELECT->G_FMAXIMUM/MINIMUM.

Vector support seems to work immediately, as long as we run the combine before
legalization (so the vector SELECTs don't get lowered) and the legalizer rules
are there to enable generation.

Differential Revision: https://reviews.llvm.org/D135047

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll

Modified: 
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir
    llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index fcf6418b62cc1..4cbaa66ae3548 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1031,7 +1031,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     form_bitfield_extract, constant_fold, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
-    sub_add_reg]>;
+    sub_add_reg, select_to_minmax]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 83d14ba6b191a..1fdc0d264bc7a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5985,8 +5985,7 @@ bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
   // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
   LLT DstTy = MRI.getType(Dst);
   // Bail out early on pointers, since we'll never want to fold to a min/max.
-  // TODO: Handle vectors.
-  if (DstTy.isPointer() || DstTy.isVector())
+  if (DstTy.isPointer())
     return false;
   // Match a floating point compare with a less-than/greater-than predicate.
   // TODO: Allow multiple users of the compare if they are all selects.

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 541d4c669b522..d7448e4df146d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -803,10 +803,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .libcallFor({s128})
       .minScalar(0, MinFPScalar);
 
-  // TODO: Vector types.
   getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
-      .legalFor({MinFPScalar, s32, s64})
-      .minScalar(0, MinFPScalar);
+      .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
+      .legalIf([=](const LegalityQuery &Query) {
+        const auto &Ty = Query.Types[0];
+        return (Ty == v8s16 || Ty == v4s16) && HasFP16;
+      })
+      .minScalar(0, MinFPScalar)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64);
 
   // TODO: Libcall support for s128.
   // TODO: s16 should be legal with full FP16 support.

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir
index ae396768b5b25..0162898ae03cf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir
@@ -9,13 +9,17 @@ body:             |
   bb.0:
     liveins: $h0, $h1
     ; FP16-LABEL: name: s16_legal_with_full_fp16
-    ; FP16: %a:_(s16) = COPY $h0
+    ; FP16: liveins: $h0, $h1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s16) = COPY $h0
     ; FP16-NEXT: %b:_(s16) = COPY $h1
     ; FP16-NEXT: %legalize_me:_(s16) = G_FMAXIMUM %a, %b
     ; FP16-NEXT: $h0 = COPY %legalize_me(s16)
     ; FP16-NEXT: RET_ReallyLR implicit $h0
     ; NO-FP16-LABEL: name: s16_legal_with_full_fp16
-    ; NO-FP16: %a:_(s16) = COPY $h0
+    ; NO-FP16: liveins: $h0, $h1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s16) = COPY $h0
     ; NO-FP16-NEXT: %b:_(s16) = COPY $h1
     ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16)
     ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16)
@@ -37,13 +41,17 @@ body:             |
   bb.0:
     liveins: $s0, $s1
     ; FP16-LABEL: name: s32_legal
-    ; FP16: %a:_(s32) = COPY $s0
+    ; FP16: liveins: $s0, $s1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s32) = COPY $s0
     ; FP16-NEXT: %b:_(s32) = COPY $s1
     ; FP16-NEXT: %legalize_me:_(s32) = G_FMAXIMUM %a, %b
     ; FP16-NEXT: $s0 = COPY %legalize_me(s32)
     ; FP16-NEXT: RET_ReallyLR implicit $s0
     ; NO-FP16-LABEL: name: s32_legal
-    ; NO-FP16: %a:_(s32) = COPY $s0
+    ; NO-FP16: liveins: $s0, $s1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s32) = COPY $s0
     ; NO-FP16-NEXT: %b:_(s32) = COPY $s1
     ; NO-FP16-NEXT: %legalize_me:_(s32) = G_FMAXIMUM %a, %b
     ; NO-FP16-NEXT: $s0 = COPY %legalize_me(s32)
@@ -62,13 +70,17 @@ body:             |
   bb.0:
     liveins: $d0, $d1
     ; FP16-LABEL: name: s64_legal
-    ; FP16: %a:_(s64) = COPY $d0
+    ; FP16: liveins: $d0, $d1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s64) = COPY $d0
     ; FP16-NEXT: %b:_(s64) = COPY $d1
     ; FP16-NEXT: %legalize_me:_(s64) = G_FMAXIMUM %a, %b
     ; FP16-NEXT: $d0 = COPY %legalize_me(s64)
     ; FP16-NEXT: RET_ReallyLR implicit $d0
     ; NO-FP16-LABEL: name: s64_legal
-    ; NO-FP16: %a:_(s64) = COPY $d0
+    ; NO-FP16: liveins: $d0, $d1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s64) = COPY $d0
     ; NO-FP16-NEXT: %b:_(s64) = COPY $d1
     ; NO-FP16-NEXT: %legalize_me:_(s64) = G_FMAXIMUM %a, %b
     ; NO-FP16-NEXT: $d0 = COPY %legalize_me(s64)
@@ -78,3 +90,62 @@ body:             |
     %legalize_me:_(s64) = G_FMAXIMUM %a, %b
     $d0 = COPY %legalize_me(s64)
     RET_ReallyLR implicit $d0
+...
+---
+name:            v2s32
+alignment:       4
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+    ; FP16-LABEL: name: v2s32
+    ; FP16: liveins: $d0, $d1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(<2 x s32>) = COPY $d0
+    ; FP16-NEXT: %b:_(<2 x s32>) = COPY $d1
+    ; FP16-NEXT: %maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
+    ; FP16-NEXT: $d0 = COPY %maximum(<2 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $d0
+    ; NO-FP16-LABEL: name: v2s32
+    ; NO-FP16: liveins: $d0, $d1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(<2 x s32>) = COPY $d0
+    ; NO-FP16-NEXT: %b:_(<2 x s32>) = COPY $d1
+    ; NO-FP16-NEXT: %maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
+    ; NO-FP16-NEXT: $d0 = COPY %maximum(<2 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $d0
+    %a:_(<2 x s32>) = COPY $d0
+    %b:_(<2 x s32>) = COPY $d1
+    %maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
+    $d0 = COPY %maximum(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            v4s32
+alignment:       4
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; FP16-LABEL: name: v4s32
+    ; FP16: liveins: $q0, $q1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; FP16-NEXT: %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
+    ; FP16-NEXT: $q0 = COPY %maximum(<4 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
+    ; NO-FP16-LABEL: name: v4s32
+    ; NO-FP16: liveins: $q0, $q1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; NO-FP16-NEXT: %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
+    ; NO-FP16-NEXT: $q0 = COPY %maximum(<4 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    %a:_(<4 x s32>) = COPY $q0
+    %b:_(<4 x s32>) = COPY $q1
+    %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
+    $q0 = COPY %maximum(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir
index 47137b621fee1..1c3c8bbcece61 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir
@@ -9,13 +9,17 @@ body:             |
   bb.0:
     liveins: $h0, $h1
     ; FP16-LABEL: name: s16_legal_with_full_fp16
-    ; FP16: %a:_(s16) = COPY $h0
+    ; FP16: liveins: $h0, $h1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s16) = COPY $h0
     ; FP16-NEXT: %b:_(s16) = COPY $h1
     ; FP16-NEXT: %legalize_me:_(s16) = G_FMINIMUM %a, %b
     ; FP16-NEXT: $h0 = COPY %legalize_me(s16)
     ; FP16-NEXT: RET_ReallyLR implicit $h0
     ; NO-FP16-LABEL: name: s16_legal_with_full_fp16
-    ; NO-FP16: %a:_(s16) = COPY $h0
+    ; NO-FP16: liveins: $h0, $h1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s16) = COPY $h0
     ; NO-FP16-NEXT: %b:_(s16) = COPY $h1
     ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16)
     ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16)
@@ -37,13 +41,17 @@ body:             |
   bb.0:
     liveins: $s0, $s1
     ; FP16-LABEL: name: s32_legal
-    ; FP16: %a:_(s32) = COPY $s0
+    ; FP16: liveins: $s0, $s1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s32) = COPY $s0
     ; FP16-NEXT: %b:_(s32) = COPY $s1
     ; FP16-NEXT: %legalize_me:_(s32) = G_FMINIMUM %a, %b
     ; FP16-NEXT: $s0 = COPY %legalize_me(s32)
     ; FP16-NEXT: RET_ReallyLR implicit $s0
     ; NO-FP16-LABEL: name: s32_legal
-    ; NO-FP16: %a:_(s32) = COPY $s0
+    ; NO-FP16: liveins: $s0, $s1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s32) = COPY $s0
     ; NO-FP16-NEXT: %b:_(s32) = COPY $s1
     ; NO-FP16-NEXT: %legalize_me:_(s32) = G_FMINIMUM %a, %b
     ; NO-FP16-NEXT: $s0 = COPY %legalize_me(s32)
@@ -62,13 +70,17 @@ body:             |
   bb.0:
     liveins: $d0, $d1
     ; FP16-LABEL: name: s64_legal
-    ; FP16: %a:_(s64) = COPY $d0
+    ; FP16: liveins: $d0, $d1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s64) = COPY $d0
     ; FP16-NEXT: %b:_(s64) = COPY $d1
     ; FP16-NEXT: %legalize_me:_(s64) = G_FMINIMUM %a, %b
     ; FP16-NEXT: $d0 = COPY %legalize_me(s64)
     ; FP16-NEXT: RET_ReallyLR implicit $d0
     ; NO-FP16-LABEL: name: s64_legal
-    ; NO-FP16: %a:_(s64) = COPY $d0
+    ; NO-FP16: liveins: $d0, $d1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s64) = COPY $d0
     ; NO-FP16-NEXT: %b:_(s64) = COPY $d1
     ; NO-FP16-NEXT: %legalize_me:_(s64) = G_FMINIMUM %a, %b
     ; NO-FP16-NEXT: $d0 = COPY %legalize_me(s64)
@@ -78,3 +90,77 @@ body:             |
     %legalize_me:_(s64) = G_FMINIMUM %a, %b
     $d0 = COPY %legalize_me(s64)
     RET_ReallyLR implicit $d0
+...
+---
+name:            v4s32
+alignment:       4
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; FP16-LABEL: name: v4s32
+    ; FP16: liveins: $q0, $q1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; FP16-NEXT: %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
+    ; FP16-NEXT: $q0 = COPY %minimum(<4 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
+    ; NO-FP16-LABEL: name: v4s32
+    ; NO-FP16: liveins: $q0, $q1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; NO-FP16-NEXT: %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
+    ; NO-FP16-NEXT: $q0 = COPY %minimum(<4 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    %a:_(<4 x s32>) = COPY $q0
+    %b:_(<4 x s32>) = COPY $q1
+    %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
+    $q0 = COPY %minimum(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+
+---
+name:            v8s32
+alignment:       4
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $q2, $q3
+    ; FP16-LABEL: name: v8s32
+    ; FP16: liveins: $q0, $q1, $q2, $q3
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; FP16-NEXT: %c:_(<4 x s32>) = COPY $q2
+    ; FP16-NEXT: %d:_(<4 x s32>) = COPY $q3
+    ; FP16-NEXT: [[FMINIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %a, %c
+    ; FP16-NEXT: [[FMINIMUM1:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %b, %d
+    ; FP16-NEXT: $q0 = COPY [[FMINIMUM]](<4 x s32>)
+    ; FP16-NEXT: $q1 = COPY [[FMINIMUM1]](<4 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
+    ; NO-FP16-LABEL: name: v8s32
+    ; NO-FP16: liveins: $q0, $q1, $q2, $q3
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; NO-FP16-NEXT: %c:_(<4 x s32>) = COPY $q2
+    ; NO-FP16-NEXT: %d:_(<4 x s32>) = COPY $q3
+    ; NO-FP16-NEXT: [[FMINIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %a, %c
+    ; NO-FP16-NEXT: [[FMINIMUM1:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %b, %d
+    ; NO-FP16-NEXT: $q0 = COPY [[FMINIMUM]](<4 x s32>)
+    ; NO-FP16-NEXT: $q1 = COPY [[FMINIMUM1]](<4 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    %a:_(<4 x s32>) = COPY $q0
+    %b:_(<4 x s32>) = COPY $q1
+    %c:_(<4 x s32>) = COPY $q2
+    %d:_(<4 x s32>) = COPY $q3
+    %v1:_(<8 x s32>) = G_CONCAT_VECTORS %a, %b
+    %v2:_(<8 x s32>) = G_CONCAT_VECTORS %c, %d
+    %minimum:_(<8 x s32>) = G_FMINIMUM %v1, %v2
+    %uv1:_(<4 x s32>), %uv2:_(<4 x s32>) = G_UNMERGE_VALUES %minimum
+    $q0 = COPY %uv1(<4 x s32>)
+    $q1 = COPY %uv2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 4be1cf2e0ab3d..47097af55a00e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -510,11 +510,11 @@
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_FMINIMUM (opcode {{[0-9]+}}): 1 type index
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
-# DEBUG-NEXT: .. the first uncovered type index: 1, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FMAXIMUM (opcode {{[0-9]+}}): 1 type index
-# DEBUG-NEXT: .. the first uncovered type index: 1, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
 # DEBUG-NEXT: .. the first uncovered type index: 2, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir
new file mode 100644
index 0000000000000..8c4300d9e7329
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir
@@ -0,0 +1,188 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -mattr=+fullfp16 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+---
+name:            test_s16
+body:             |
+  bb.0:
+    liveins: $h0
+
+    ; CHECK-LABEL: name: test_s16
+    ; CHECK: liveins: $h0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[C]], [[COPY]]
+    ; CHECK-NEXT: $h0 = COPY [[FMAXIMUM]](s16)
+    ; CHECK-NEXT: RET_ReallyLR implicit $h0
+    %0:_(s16) = COPY $h0
+    %1:_(s16) = G_FCONSTANT half 0xH0000
+    %2:_(s1) = G_FCMP floatpred(olt), %0(s16), %1
+    %3:_(s16) = G_SELECT %2(s1), %1, %0
+    $h0 = COPY %3(s16)
+    RET_ReallyLR implicit $h0
+
+...
+---
+name:            test_s32
+body:             |
+  bb.0:
+    liveins: $s0
+
+    ; CHECK-LABEL: name: test_s32
+    ; CHECK: liveins: $s0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[C]], [[COPY]]
+    ; CHECK-NEXT: $s0 = COPY [[FMAXIMUM]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $s0
+    %0:_(s32) = COPY $s0
+    %1:_(s32) = G_FCONSTANT float 0.000000e+00
+    %2:_(s1) = G_FCMP floatpred(olt), %0(s32), %1
+    %3:_(s32) = G_SELECT %2(s1), %1, %0
+    $s0 = COPY %3(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            test_s64
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_s64
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[C]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[FMAXIMUM]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:_(s64) = COPY $d0
+    %1:_(s64) = G_FCONSTANT double 0.000000e+00
+    %2:_(s1) = G_FCMP floatpred(olt), %0(s64), %1
+    %3:_(s64) = G_SELECT %2(s1), %1, %0
+    $d0 = COPY %3(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_s64_fmin
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_s64_fmin
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[C]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[FMINIMUM]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:_(s64) = COPY $d0
+    %1:_(s64) = G_FCONSTANT double 0.000000e+00
+    %2:_(s1) = G_FCMP floatpred(ogt), %0(s64), %1
+    %3:_(s64) = G_SELECT %2(s1), %1, %0
+    $d0 = COPY %3(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_v8s16
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v8s16
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<8 x s16>) = COPY $q0
+    %2:_(s16) = G_FCONSTANT half 0xH0000
+    %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16)
+    %3:_(<8 x s1>) = G_FCMP floatpred(olt), %0(<8 x s16>), %1
+    %4:_(<8 x s16>) = G_SELECT %3(<8 x s1>), %1, %0
+    $q0 = COPY %4(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v4s32
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v4s32
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BUILD_VECTOR]], [[BITCAST]]
+    ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %1:_(<2 x s64>) = COPY $q0
+    %0:_(<4 x s32>) = G_BITCAST %1(<2 x s64>)
+    %3:_(s32) = G_FCONSTANT float 0.000000e+00
+    %2:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32), %3(s32), %3(s32)
+    %4:_(<4 x s1>) = G_FCMP floatpred(olt), %0(<4 x s32>), %2
+    %5:_(<4 x s32>) = G_SELECT %4(<4 x s1>), %2, %0
+    $q0 = COPY %5(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v2s64
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v2s64
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<2 x s64>) = COPY $q0
+    %2:_(s64) = G_FCONSTANT double 0.000000e+00
+    %1:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %2(s64)
+    %3:_(<2 x s1>) = G_FCMP floatpred(olt), %0(<2 x s64>), %1
+    %4:_(<2 x s64>) = G_SELECT %3(<2 x s1>), %1, %0
+    $q0 = COPY %4(<2 x s64>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v2s64_fmin
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v2s64_fmin
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[BUILD_VECTOR]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[FMINIMUM]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<2 x s64>) = COPY $q0
+    %2:_(s64) = G_FCONSTANT double 0.000000e+00
+    %1:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %2(s64)
+    %3:_(<2 x s1>) = G_FCMP floatpred(ogt), %0(<2 x s64>), %1
+    %4:_(<2 x s64>) = G_SELECT %3(<2 x s1>), %1, %0
+    $q0 = COPY %4(<2 x s64>)
+    RET_ReallyLR implicit $q0
+
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll
new file mode 100644
index 0000000000000..1986e79b3cec9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s
+define half @test_s16(half %a) #0 {
+; CHECK-LABEL: test_s16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    fmax h0, h1, h0
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt half %a, 0.0
+  %sel = select i1 %fcmp, half 0.0, half %a
+  ret half %sel
+}
+
+define float @test_s32(float %a) #0 {
+; CHECK-LABEL: test_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    fmax s0, s1, s0
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt float %a, 0.0
+  %sel = select i1 %fcmp, float 0.0, float %a
+  ret float %sel
+}
+
+define double @test_s64(double %a) #0 {
+; CHECK-LABEL: test_s64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    fmax d0, d1, d0
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt double %a, 0.0
+  %sel = select i1 %fcmp, double 0.0, double %a
+  ret double %sel
+}
+
+define <4 x half> @test_v4s16(<4 x half> %a) #0 {
+; CHECK-LABEL: test_v4s16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.4h, v1.h[0]
+; CHECK-NEXT:    fmax v0.4h, v1.4h, v0.4h
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <4 x half> %a, zeroinitializer
+  %sel = select <4 x i1> %fcmp, <4 x half> zeroinitializer, <4 x half> %a
+  ret <4 x half> %sel
+}
+
+define <8 x half> @test_v8s16(<8 x half> %a) #0 {
+; CHECK-LABEL: test_v8s16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.8h, v1.h[0]
+; CHECK-NEXT:    fmax v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <8 x half> %a, zeroinitializer
+  %sel = select <8 x i1> %fcmp, <8 x half> zeroinitializer, <8 x half> %a
+  ret <8 x half> %sel
+}
+
+define <2 x float> @test_v2s32(<2 x float> %a) #0 {
+; CHECK-LABEL: test_v2s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.2s, v1.s[0]
+; CHECK-NEXT:    fmax v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <2 x float> %a, zeroinitializer
+  %sel = select <2 x i1> %fcmp, <2 x float> zeroinitializer, <2 x float> %a
+  ret <2 x float> %sel
+}
+
+define <4 x float> @test_v4s32(<4 x float> %a) #0 {
+; CHECK-LABEL: test_v4s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.4s, v1.s[0]
+; CHECK-NEXT:    fmax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <4 x float> %a, zeroinitializer
+  %sel = select <4 x i1> %fcmp, <4 x float> zeroinitializer, <4 x float> %a
+  ret <4 x float> %sel
+}
+
+define <2 x double> @test_v2s64(<2 x double> %a) #0 {
+; CHECK-LABEL: test_v2s64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.2d, v1.d[0]
+; CHECK-NEXT:    fmax v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <2 x double> %a, zeroinitializer
+  %sel = select <2 x i1> %fcmp, <2 x double> zeroinitializer, <2 x double> %a
+  ret <2 x double> %sel
+}
+


        


More information about the llvm-commits mailing list