[llvm] r323118 - [AArch64] optimise v4f16 fcmps to utilise vector instructions
Carey Williams via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 22 06:16:11 PST 2018
Author: carwil
Date: Mon Jan 22 06:16:11 2018
New Revision: 323118
URL: http://llvm.org/viewvc/llvm-project?rev=323118&view=rev
Log:
[AArch64] optimise v4f16 fcmps to utilise vector instructions
Improves the code generation for v4f16 FCMP instructions when FullFP16 is not supported.
Generating FCTVL(s) rather than a longer series of FCVTs.
Differential Revision: https://reviews.llvm.org/D41772
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=323118&r1=323117&r2=323118&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Jan 22 06:16:11 2018
@@ -7298,8 +7298,21 @@ SDValue AArch64TargetLowering::LowerVSET
return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
- if (LHS.getValueType().getVectorElementType() == MVT::f16)
- return SDValue();
+ const bool FullFP16 =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+
+ // Make v4f16 (only) fcmp operations utilise vector instructions
+ // v8f16 support will be a litle more complicated
+ if (LHS.getValueType().getVectorElementType() == MVT::f16) {
+ if (!FullFP16 && LHS.getValueType().getVectorNumElements() == 4) {
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
+ RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
+ SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
+ DAG.ReplaceAllUsesWith(Op, NewSetcc);
+ CmpVT = MVT::v4i32;
+ } else
+ return SDValue();
+ }
assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
LHS.getValueType().getVectorElementType() == MVT::f64);
Modified: llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll?rev=323118&r1=323117&r2=323118&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll Mon Jan 22 06:16:11 2018
@@ -295,18 +295,12 @@ define <4 x i16> @fptoui_i16(<4 x half>
define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_une:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, ne
-; CHECK-CVT-DAG: csetm {{.*}}, ne
-; CHECK-CVT-DAG: csetm {{.*}}, ne
-; CHECK-CVT-DAG: csetm {{.*}}, ne
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmeq
+; CHECK-CVT: mvn
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_une:
; CHECK-FP16-NOT: fcvt
@@ -325,22 +319,14 @@ define <4 x i1> @test_fcmp_une(<4 x half
define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_ueq:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], eq
-; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], eq
-; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], eq
-; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], eq
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, vc
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, vc
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, vc
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, vc
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: fcmgt
+; CHECK-CVT: orr
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_ueq:
; CHECK-FP16-NOT: fcvt
@@ -359,18 +345,12 @@ define <4 x i1> @test_fcmp_ueq(<4 x half
define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_ugt:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, hi
-; CHECK-CVT-DAG: csetm {{.*}}, hi
-; CHECK-CVT-DAG: csetm {{.*}}, hi
-; CHECK-CVT-DAG: csetm {{.*}}, hi
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_ugt:
; CHECK-FP16-NOT: fcvt
@@ -389,18 +369,12 @@ define <4 x i1> @test_fcmp_ugt(<4 x half
define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_uge:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, pl
-; CHECK-CVT-DAG: csetm {{.*}}, pl
-; CHECK-CVT-DAG: csetm {{.*}}, pl
-; CHECK-CVT-DAG: csetm {{.*}}, pl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_uge:
; CHECK-FP16-NOT: fcvt
@@ -419,18 +393,12 @@ define <4 x i1> @test_fcmp_uge(<4 x half
define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_ult:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, lt
-; CHECK-CVT-DAG: csetm {{.*}}, lt
-; CHECK-CVT-DAG: csetm {{.*}}, lt
-; CHECK-CVT-DAG: csetm {{.*}}, lt
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_ult:
; CHECK-FP16-NOT: fcvt
@@ -449,18 +417,12 @@ define <4 x i1> @test_fcmp_ult(<4 x half
define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_ule:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, le
-; CHECK-CVT-DAG: csetm {{.*}}, le
-; CHECK-CVT-DAG: csetm {{.*}}, le
-; CHECK-CVT-DAG: csetm {{.*}}, le
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_ule:
; CHECK-FP16-NOT: fcvt
@@ -479,18 +441,14 @@ define <4 x i1> @test_fcmp_ule(<4 x half
define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_uno:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, vs
-; CHECK-CVT-DAG: csetm {{.*}}, vs
-; CHECK-CVT-DAG: csetm {{.*}}, vs
-; CHECK-CVT-DAG: csetm {{.*}}, vs
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: fcmgt
+; CHECK-CVT: orr
+; CHECK-CVT: xtn
+; CHECK-CVT: mvn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_uno:
; CHECK-FP16-NOT: fcvt
@@ -509,22 +467,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half
define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_one:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], mi
-; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], mi
-; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], mi
-; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], mi
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, le
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, le
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, le
-; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, le
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: fcmgt
+; CHECK-CVT: orr
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_one:
; CHECK-FP16-NOT: fcvt
@@ -543,18 +492,11 @@ define <4 x i1> @test_fcmp_one(<4 x half
define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_oeq:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, eq
-; CHECK-CVT-DAG: csetm {{.*}}, eq
-; CHECK-CVT-DAG: csetm {{.*}}, eq
-; CHECK-CVT-DAG: csetm {{.*}}, eq
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmeq
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_oeq:
; CHECK-FP16-NOT: fcvt
@@ -573,18 +515,11 @@ define <4 x i1> @test_fcmp_oeq(<4 x half
define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_ogt:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, gt
-; CHECK-CVT-DAG: csetm {{.*}}, gt
-; CHECK-CVT-DAG: csetm {{.*}}, gt
-; CHECK-CVT-DAG: csetm {{.*}}, gt
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_ogt:
; CHECK-FP16-NOT: fcvt
@@ -603,18 +538,11 @@ define <4 x i1> @test_fcmp_ogt(<4 x half
define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_oge:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, ge
-; CHECK-CVT-DAG: csetm {{.*}}, ge
-; CHECK-CVT-DAG: csetm {{.*}}, ge
-; CHECK-CVT-DAG: csetm {{.*}}, ge
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_oge:
; CHECK-FP16-NOT: fcvt
@@ -633,18 +561,11 @@ define <4 x i1> @test_fcmp_oge(<4 x half
define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_olt:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, mi
-; CHECK-CVT-DAG: csetm {{.*}}, mi
-; CHECK-CVT-DAG: csetm {{.*}}, mi
-; CHECK-CVT-DAG: csetm {{.*}}, mi
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmgt
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_olt:
; CHECK-FP16-NOT: fcvt
@@ -663,18 +584,11 @@ define <4 x i1> @test_fcmp_olt(<4 x half
define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_ole:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, ls
-; CHECK-CVT-DAG: csetm {{.*}}, ls
-; CHECK-CVT-DAG: csetm {{.*}}, ls
-; CHECK-CVT-DAG: csetm {{.*}}, ls
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_ole:
; CHECK-FP16-NOT: fcvt
@@ -693,18 +607,13 @@ define <4 x i1> @test_fcmp_ole(<4 x half
define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-LABEL: test_fcmp_ord:
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: fcvt
-; CHECK-CVT-DAG: csetm {{.*}}, vc
-; CHECK-CVT-DAG: csetm {{.*}}, vc
-; CHECK-CVT-DAG: csetm {{.*}}, vc
-; CHECK-CVT-DAG: csetm {{.*}}, vc
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcvtl
+; CHECK-CVT: fcmge
+; CHECK-CVT: fcmgt
+; CHECK-CVT: orr
+; CHECK-CVT: xtn
+; CHECK-CVT: ret
; CHECK-FP16-LABEL: test_fcmp_ord:
; CHECK-FP16-NOT: fcvt
More information about the llvm-commits
mailing list