[llvm] c02aa53 - [AArch64][SVE] Add "fast" fcmp operations.

Fri Jul 24 13:23:57 PDT 2020

Author: Eli Friedman
Date: 2020-07-24T13:22:41-07:00
New Revision: c02aa53ecb25189bfdecd852a251e1c17ed0ee24

URL: https://github.com/llvm/llvm-project/commit/c02aa53ecb25189bfdecd852a251e1c17ed0ee24
DIFF: https://github.com/llvm/llvm-project/commit/c02aa53ecb25189bfdecd852a251e1c17ed0ee24.diff

LOG: [AArch64][SVE] Add "fast" fcmp operations.

dacf8d3 added support for most fcmp operations, but there are some extra
variations I hadn't considered: SelectionDAG supports float comparisons
that are neither ordered nor unordered. Add support for the missing
operations.

Differential Revision: https://reviews.llvm.org/D84460

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-fcmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cb5530077fdd..f080abd8e627 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -230,7 +230,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
            MVT::nxv2f64 }) {
       setCondCodeAction(ISD::SETO, VT, Expand);
       setCondCodeAction(ISD::SETOLT, VT, Expand);
+      setCondCodeAction(ISD::SETLT, VT, Expand);
       setCondCodeAction(ISD::SETOLE, VT, Expand);
+      setCondCodeAction(ISD::SETLE, VT, Expand);
       setCondCodeAction(ISD::SETULT, VT, Expand);
       setCondCodeAction(ISD::SETULE, VT, Expand);
       setCondCodeAction(ISD::SETUGE, VT, Expand);

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index dc501a9536b9..7c39268a4441 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -210,6 +210,19 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
 
 def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
 
+def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
+                               [(setoge node:$lhs, node:$rhs),
+                                (setge node:$lhs, node:$rhs)]>;
+def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
+                                [(setogt node:$lhs, node:$rhs),
+                                 (setgt node:$lhs, node:$rhs)]>;
+def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
+                                [(setoeq node:$lhs, node:$rhs),
+                                 (seteq node:$lhs, node:$rhs)]>;
+def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
+                                [(setone node:$lhs, node:$rhs),
+                                 (setne node:$lhs, node:$rhs)]>;
+
 let Predicates = [HasSVE] in {
   defm RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
   def  RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
@@ -1172,10 +1185,10 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
   defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
   defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
 
-  defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge>;
-  defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt>;
-  defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq>;
-  defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone>;
+  defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
+  defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
+  defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
+  defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
   defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
   defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
   defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;

diff  --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
index 86fff734f188..3bb3627e2393 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -257,3 +257,58 @@ define <vscale x 4 x i32> @oeq_4f32_zext(<vscale x 4 x float> %x, <vscale x 4 x
   %r = zext <vscale x 4 x i1> %y to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %r
 }
+
+define <vscale x 4 x i1> @eq_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: eq_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp fast oeq <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @gt_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: gt_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp fast ogt <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ge_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ge_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp fast oge <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @lt_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: lt_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    ret
+  %y = fcmp fast olt <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @le_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: le_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    ret
+  %y = fcmp fast ole <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ne_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ne_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmne p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp fast one <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}