[llvm] r355545 - [AArch64] Improve FP16 instruction selection for vector round and vector conver from half instructions
Abderrazek Zaafrani via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 6 12:30:06 PST 2019
Author: az
Date: Wed Mar 6 12:30:06 2019
New Revision: 355545
URL: http://llvm.org/viewvc/llvm-project?rev=355545&view=rev
Log:
[AArch64] Improve FP16 instruction selection for vector round and vector conver from half instructions
https://reviews.llvm.org/D58855
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll
llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=355545&r1=355544&r2=355545&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Wed Mar 6 12:30:06 2019
@@ -748,6 +748,17 @@ AArch64TargetLowering::AArch64TargetLowe
setOperationAction(ISD::FROUND, Ty, Legal);
}
+ if (Subtarget->hasFullFP16()) {
+ for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
+ setOperationAction(ISD::FFLOOR, Ty, Legal);
+ setOperationAction(ISD::FNEARBYINT, Ty, Legal);
+ setOperationAction(ISD::FCEIL, Ty, Legal);
+ setOperationAction(ISD::FRINT, Ty, Legal);
+ setOperationAction(ISD::FTRUNC, Ty, Legal);
+ setOperationAction(ISD::FROUND, Ty, Legal);
+ }
+ }
+
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
}
@@ -2329,7 +2340,8 @@ SDValue AArch64TargetLowering::LowerFP_R
SDLoc(Op)).first;
}
-static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
+ SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
@@ -2337,8 +2349,9 @@ static SDValue LowerVectorFP_TO_INT(SDVa
EVT VT = Op.getValueType();
unsigned NumElts = InVT.getVectorNumElements();
- // f16 vectors are promoted to f32 before a conversion.
- if (InVT.getVectorElementType() == MVT::f16) {
+ // f16 conversions are promoted to f32 when full fp16 is not supported.
+ if (InVT.getVectorElementType() == MVT::f16 &&
+ !Subtarget->hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
return DAG.getNode(
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=355545&r1=355544&r2=355545&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Wed Mar 6 12:30:06 2019
@@ -656,6 +656,7 @@ private:
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll?rev=355545&r1=355544&r2=355545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll Wed Mar 6 12:30:06 2019
@@ -200,6 +200,15 @@ define %v4f16 @test_v4f16.nearbyint(%v4f
%1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a)
ret %v4f16 %1
}
+define %v4f16 @test_v4f16.round(%v4f16 %a) {
+ ; CHECK-LABEL: test_v4f16.round:
+ ; CHECK-NOFP16-COUNT-4: frinta s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frinta.4h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v4f16 @llvm.round.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
declare %v4f16 @llvm.powi.v4f16(%v4f16, i32) #0
@@ -218,6 +227,7 @@ declare %v4f16 @llvm.ceil.v4f16(%v4f16)
declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
+declare %v4f16 @llvm.round.v4f16(%v4f16) #0
;;;
@@ -409,6 +419,15 @@ define %v8f16 @test_v8f16.nearbyint(%v8f
%1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a)
ret %v8f16 %1
}
+define %v8f16 @test_v8f16.round(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.round:
+ ; CHECK-NOFP16-COUNT-8: frinta s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frinta.8h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v8f16 @llvm.round.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
declare %v8f16 @llvm.powi.v8f16(%v8f16, i32) #0
@@ -427,6 +446,7 @@ declare %v8f16 @llvm.ceil.v8f16(%v8f16)
declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
+declare %v8f16 @llvm.round.v8f16(%v8f16) #0
;;; Float vectors
Modified: llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll?rev=355545&r1=355544&r2=355545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll Wed Mar 6 12:30:06 2019
@@ -257,40 +257,44 @@ define void @test_insert_at_zero(half %a
define <4 x i8> @fptosi_i8(<4 x half> %a) #0 {
; CHECK-COMMON-LABEL: fptosi_i8:
-; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
-; CHECK-COMMON-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
-; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]]
-; CHECK-COMMON-NEXT: ret
+; CHECK-FP16: fcvtzs v0.4h, v0.4h
+; CHECK-CVT-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-CVT-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-CVT-NEXT: xtn v0.4h, [[REG2]]
+; CHECK-COMMON-NEXT: ret
%1 = fptosi<4 x half> %a to <4 x i8>
ret <4 x i8> %1
}
define <4 x i16> @fptosi_i16(<4 x half> %a) #0 {
; CHECK-COMMON-LABEL: fptosi_i16:
-; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
-; CHECK-COMMON-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
-; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]]
-; CHECK-COMMON-NEXT: ret
+; CHECK-FP16: fcvtzs v0.4h, v0.4h
+; CHECK-CVT-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-CVT-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-CVT-NEXT: xtn v0.4h, [[REG2]]
+; CHECK-COMMON-NEXT: ret
%1 = fptosi<4 x half> %a to <4 x i16>
ret <4 x i16> %1
}
define <4 x i8> @fptoui_i8(<4 x half> %a) #0 {
; CHECK-COMMON-LABEL: fptoui_i8:
-; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-FP16: fcvtzs v0.4h, v0.4h
+; CHECK-CVT-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
; NOTE: fcvtzs selected here because the xtn shaves the sign bit
-; CHECK-COMMON-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
-; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]]
-; CHECK-COMMON-NEXT: ret
+; CHECK-CVT-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-CVT-NEXT: xtn v0.4h, [[REG2]]
+; CHECK-COMMON-NEXT: ret
%1 = fptoui<4 x half> %a to <4 x i8>
ret <4 x i8> %1
}
define <4 x i16> @fptoui_i16(<4 x half> %a) #0 {
; CHECK-COMMON-LABEL: fptoui_i16:
-; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
-; CHECK-COMMON-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]]
-; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]]
+; CHECK-FP16: fcvtzu v0.4h, v0.4h
+; CHECK-CVT-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-CVT-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-CVT-NEXT: xtn v0.4h, [[REG2]]
; CHECK-COMMON-NEXT: ret
%1 = fptoui<4 x half> %a to <4 x i16>
ret <4 x i16> %1
Modified: llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll?rev=355545&r1=355544&r2=355545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll Wed Mar 6 12:30:06 2019
@@ -395,40 +395,45 @@ define void @test_insert_at_zero(half %a
define <8 x i8> @fptosi_i8(<8 x half> %a) #0 {
; CHECK-LABEL: fptosi_i8:
-; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
-; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
-; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]]
-; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
-; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]]
-; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]]
-; CHECK-NEXT: xtn v0.8b, [[I16]].8h
-; CHECK-NEXT: ret
+; CHECK-FP16-NEXT: fcvtzs [[LO:v[0-9]+\.8h]], v0.8h
+; CHECK-CVT-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-CVT-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-CVT-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-CVT-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-CVT-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-CVT-DAG: xtn2 [[I16]].8h, [[HIF32]]
+; CHECK-CVT-DAG: xtn v0.8b, [[I16]].8h
+; CHECK-FP16-NEXT: xtn v0.8b, [[LO]]
+; CHECK-NEXT: ret
%1 = fptosi<8 x half> %a to <8 x i8>
ret <8 x i8> %1
}
define <8 x i16> @fptosi_i16(<8 x half> %a) #0 {
; CHECK-LABEL: fptosi_i16:
-; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
-; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
-; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]]
-; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
-; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]]
-; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]]
-; CHECK-NEXT: ret
+; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
+; CHECK-CVT_DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-CVT_DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-CVT_DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-CVT_DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-CVT_DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-CVT_DAG: xtn2 [[I16]].8h, [[HIF32]]
+; CHECK-COMMON_NEXT: ret
%1 = fptosi<8 x half> %a to <8 x i16>
ret <8 x i16> %1
}
define <8 x i8> @fptoui_i8(<8 x half> %a) #0 {
; CHECK-LABEL: fptoui_i8:
-; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
-; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
-; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]]
-; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
-; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]]
-; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]]
-; CHECK-NEXT: xtn v0.8b, [[I16]].8h
+; CHECK-FP16-NEXT: fcvtzu [[LO:v[0-9]+\.8h]], v0.8h
+; CHECK-CVT-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-CVT-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-CVT-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-CVT-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-CVT-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-CVT-DAG: xtn2 [[I16]].8h, [[HIF32]]
+; CHECK-CVT-DAG: xtn v0.8b, [[I16]].8h
+; CHECK-FP16-NEXT: xtn v0.8b, [[LO]]
; CHECK-NEXT: ret
%1 = fptoui<8 x half> %a to <8 x i8>
ret <8 x i8> %1
@@ -436,13 +441,14 @@ define <8 x i8> @fptoui_i8(<8 x half> %a
define <8 x i16> @fptoui_i16(<8 x half> %a) #0 {
; CHECK-LABEL: fptoui_i16:
-; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
-; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
-; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]]
-; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
-; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]]
-; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]]
-; CHECK-NEXT: ret
+; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
+; CHECK-CVT-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-CVT-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-CVT-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-CVT-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-CVT-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-CVT-DAG: xtn2 [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: ret
%1 = fptoui<8 x half> %a to <8 x i16>
ret <8 x i16> %1
}
More information about the llvm-commits
mailing list