[llvm] r323118 - [AArch64] optimise v4f16 fcmps to utilise vector instructions

Carey Williams via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 22 06:16:11 PST 2018


Author: carwil
Date: Mon Jan 22 06:16:11 2018
New Revision: 323118

URL: http://llvm.org/viewvc/llvm-project?rev=323118&view=rev
Log:
[AArch64] optimise v4f16 fcmps to utilise vector instructions

Improves the code generation for v4f16 FCMP instructions when FullFP16 is not supported.
Generating FCTVL(s) rather than a longer series of FCVTs.

Differential Revision: https://reviews.llvm.org/D41772

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=323118&r1=323117&r2=323118&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Jan 22 06:16:11 2018
@@ -7298,8 +7298,21 @@ SDValue AArch64TargetLowering::LowerVSET
     return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
   }
 
-  if (LHS.getValueType().getVectorElementType() == MVT::f16)
-    return SDValue();
+  const bool FullFP16 =
+    static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+
+  // Make v4f16 (only) fcmp operations utilise vector instructions
+  // v8f16 support will be a litle more complicated
+  if (LHS.getValueType().getVectorElementType() == MVT::f16) {
+    if (!FullFP16 && LHS.getValueType().getVectorNumElements() == 4) {
+      LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
+      RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
+      SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
+      DAG.ReplaceAllUsesWith(Op, NewSetcc);
+      CmpVT = MVT::v4i32;
+    } else
+      return SDValue();
+  }
 
   assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
          LHS.getValueType().getVectorElementType() == MVT::f64);

Modified: llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll?rev=323118&r1=323117&r2=323118&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll Mon Jan 22 06:16:11 2018
@@ -295,18 +295,12 @@ define <4 x i16> @fptoui_i16(<4 x half>
 
 define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_une:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, ne
-; CHECK-CVT-DAG: csetm {{.*}}, ne
-; CHECK-CVT-DAG: csetm {{.*}}, ne
-; CHECK-CVT-DAG: csetm {{.*}}, ne
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmeq
+; CHECK-CVT: mvn
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_une:
 ; CHECK-FP16-NOT:   fcvt
@@ -325,22 +319,14 @@ define <4 x i1> @test_fcmp_une(<4 x half
 
 define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_ueq:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], eq
-; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], eq
-; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], eq
-; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], eq
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, vc
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, vc
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, vc
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, vc
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: fcmgt
+; CHECK-CVT: orr
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_ueq:
 ; CHECK-FP16-NOT:   fcvt
@@ -359,18 +345,12 @@ define <4 x i1> @test_fcmp_ueq(<4 x half
 
 define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_ugt:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, hi
-; CHECK-CVT-DAG: csetm {{.*}}, hi
-; CHECK-CVT-DAG: csetm {{.*}}, hi
-; CHECK-CVT-DAG: csetm {{.*}}, hi
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_ugt:
 ; CHECK-FP16-NOT:   fcvt
@@ -389,18 +369,12 @@ define <4 x i1> @test_fcmp_ugt(<4 x half
 
 define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_uge:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, pl
-; CHECK-CVT-DAG: csetm {{.*}}, pl
-; CHECK-CVT-DAG: csetm {{.*}}, pl
-; CHECK-CVT-DAG: csetm {{.*}}, pl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_uge:
 ; CHECK-FP16-NOT:   fcvt
@@ -419,18 +393,12 @@ define <4 x i1> @test_fcmp_uge(<4 x half
 
 define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_ult:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, lt
-; CHECK-CVT-DAG: csetm {{.*}}, lt
-; CHECK-CVT-DAG: csetm {{.*}}, lt
-; CHECK-CVT-DAG: csetm {{.*}}, lt
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_ult:
 ; CHECK-FP16-NOT:   fcvt
@@ -449,18 +417,12 @@ define <4 x i1> @test_fcmp_ult(<4 x half
 
 define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_ule:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, le
-; CHECK-CVT-DAG: csetm {{.*}}, le
-; CHECK-CVT-DAG: csetm {{.*}}, le
-; CHECK-CVT-DAG: csetm {{.*}}, le
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_ule:
 ; CHECK-FP16-NOT:   fcvt
@@ -479,18 +441,14 @@ define <4 x i1> @test_fcmp_ule(<4 x half
 
 define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_uno:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, vs
-; CHECK-CVT-DAG: csetm {{.*}}, vs
-; CHECK-CVT-DAG: csetm {{.*}}, vs
-; CHECK-CVT-DAG: csetm {{.*}}, vs
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: fcmgt
+; CHECK-CVT: orr
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_uno:
 ; CHECK-FP16-NOT:   fcvt
@@ -509,22 +467,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half
 
 define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_one:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], mi
-; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], mi
-; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], mi
-; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], mi
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, le
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, le
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, le
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, le
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: fcmgt
+; CHECK-CVT: orr
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_one:
 ; CHECK-FP16-NOT:   fcvt
@@ -543,18 +492,11 @@ define <4 x i1> @test_fcmp_one(<4 x half
 
 define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_oeq:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, eq
-; CHECK-CVT-DAG: csetm {{.*}}, eq
-; CHECK-CVT-DAG: csetm {{.*}}, eq
-; CHECK-CVT-DAG: csetm {{.*}}, eq
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmeq
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_oeq:
 ; CHECK-FP16-NOT:   fcvt
@@ -573,18 +515,11 @@ define <4 x i1> @test_fcmp_oeq(<4 x half
 
 define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_ogt:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, gt
-; CHECK-CVT-DAG: csetm {{.*}}, gt
-; CHECK-CVT-DAG: csetm {{.*}}, gt
-; CHECK-CVT-DAG: csetm {{.*}}, gt
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_ogt:
 ; CHECK-FP16-NOT:   fcvt
@@ -603,18 +538,11 @@ define <4 x i1> @test_fcmp_ogt(<4 x half
 
 define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_oge:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, ge
-; CHECK-CVT-DAG: csetm {{.*}}, ge
-; CHECK-CVT-DAG: csetm {{.*}}, ge
-; CHECK-CVT-DAG: csetm {{.*}}, ge
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_oge:
 ; CHECK-FP16-NOT:   fcvt
@@ -633,18 +561,11 @@ define <4 x i1> @test_fcmp_oge(<4 x half
 
 define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_olt:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, mi
-; CHECK-CVT-DAG: csetm {{.*}}, mi
-; CHECK-CVT-DAG: csetm {{.*}}, mi
-; CHECK-CVT-DAG: csetm {{.*}}, mi
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_olt:
 ; CHECK-FP16-NOT:   fcvt
@@ -663,18 +584,11 @@ define <4 x i1> @test_fcmp_olt(<4 x half
 
 define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_ole:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, ls
-; CHECK-CVT-DAG: csetm {{.*}}, ls
-; CHECK-CVT-DAG: csetm {{.*}}, ls
-; CHECK-CVT-DAG: csetm {{.*}}, ls
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_ole:
 ; CHECK-FP16-NOT:   fcvt
@@ -693,18 +607,13 @@ define <4 x i1> @test_fcmp_ole(<4 x half
 
 define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-LABEL: test_fcmp_ord:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, vc
-; CHECK-CVT-DAG: csetm {{.*}}, vc
-; CHECK-CVT-DAG: csetm {{.*}}, vc
-; CHECK-CVT-DAG: csetm {{.*}}, vc
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: fcmgt
+; CHECK-CVT: orr
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
 
 ; CHECK-FP16-LABEL: test_fcmp_ord:
 ; CHECK-FP16-NOT:   fcvt




More information about the llvm-commits mailing list