[llvm] r311646 - [AArch64] Custom lowering of copysign f16
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 24 02:21:10 PDT 2017
Author: sjoerdmeijer
Date: Thu Aug 24 02:21:10 2017
New Revision: 311646
URL: http://llvm.org/viewvc/llvm-project?rev=311646&view=rev
Log:
[AArch64] Custom lowering of copysign f16
This is a follow up patch of r311154 and introduces custom lowering of copysign
f16 to avoid promotions to single precision types when the subtarget supports
fullfp16.
Differential Revision: https://reviews.llvm.org/D36893
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/f16-instructions.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=311646&r1=311645&r2=311646&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Aug 24 02:21:10 2017
@@ -321,6 +321,10 @@ AArch64TargetLowering::AArch64TargetLowe
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ if (Subtarget->hasFullFP16())
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
+ else
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
@@ -333,7 +337,6 @@ AArch64TargetLowering::AArch64TargetLowe
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
if (!Subtarget->hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
@@ -4084,25 +4087,26 @@ SDValue AArch64TargetLowering::LowerFCOP
In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
EVT VecVT;
- EVT EltVT;
uint64_t EltMask;
SDValue VecVal1, VecVal2;
- if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
- EltVT = MVT::i32;
- VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
- EltMask = 0x80000000ULL;
+ auto setVecVal = [&] (int Idx) {
if (!VT.isVector()) {
- VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
+ VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
DAG.getUNDEF(VecVT), In1);
- VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
+ VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
DAG.getUNDEF(VecVT), In2);
} else {
VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
}
+ };
+
+ if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
+ VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
+ EltMask = 0x80000000ULL;
+ setVecVal(AArch64::ssub);
} else if (VT == MVT::f64 || VT == MVT::v2f64) {
- EltVT = MVT::i64;
VecVT = MVT::v2i64;
// We want to materialize a mask with the high bit set, but the AdvSIMD
@@ -4110,15 +4114,11 @@ SDValue AArch64TargetLowering::LowerFCOP
// 64-bit elements. Instead, materialize zero and then negate it.
EltMask = 0;
- if (!VT.isVector()) {
- VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
- DAG.getUNDEF(VecVT), In1);
- VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
- DAG.getUNDEF(VecVT), In2);
- } else {
- VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
- VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
- }
+ setVecVal(AArch64::dsub);
+ } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
+ VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
+ EltMask = 0x8000ULL;
+ setVecVal(AArch64::hsub);
} else {
llvm_unreachable("Invalid type for copysign!");
}
@@ -4136,6 +4136,8 @@ SDValue AArch64TargetLowering::LowerFCOP
SDValue Sel =
DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
+ if (VT == MVT::f16)
+ return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
if (VT == MVT::f32)
return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
else if (VT == MVT::f64)
Modified: llvm/trunk/test/CodeGen/AArch64/f16-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/f16-instructions.ll?rev=311646&r1=311645&r2=311646&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/f16-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/f16-instructions.ll Thu Aug 24 02:21:10 2017
@@ -934,37 +934,57 @@ define half @test_maxnum(half %a, half %
ret half %r
}
-; CHECK-COMMON-LABEL: test_copysign:
-; CHECK-COMMON-NEXT: fcvt s1, h1
-; CHECK-COMMON-NEXT: fcvt s0, h0
-; CHECK-COMMON-NEXT: movi.4s v2, #128, lsl #24
-; CHECK-COMMON-NEXT: bit.16b v0, v1, v2
-; CHECK-COMMON-NEXT: fcvt h0, s0
-; CHECK-COMMON-NEXT: ret
+; CHECK-CVT-LABEL: test_copysign:
+; CHECK-CVT-NEXT: fcvt s1, h1
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24
+; CHECK-CVT-NEXT: bit.16b v0, v1, v2
+; CHECK-CVT-NEXT: fcvt h0, s0
+; CHECK-CVT-NEXT: ret
+
+; CHECK-FP16-LABEL: test_copysign:
+; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8
+; CHECK-FP16-NEXT: bit.16b v0, v1, v2
+; CHECK-FP16-NEXT: ret
+
define half @test_copysign(half %a, half %b) #0 {
%r = call half @llvm.copysign.f16(half %a, half %b)
ret half %r
}
-; CHECK-COMMON-LABEL: test_copysign_f32:
-; CHECK-COMMON-NEXT: fcvt s0, h0
-; CHECK-COMMON-NEXT: movi.4s v2, #128, lsl #24
-; CHECK-COMMON-NEXT: bit.16b v0, v1, v2
-; CHECK-COMMON-NEXT: fcvt h0, s0
-; CHECK-COMMON-NEXT: ret
+; CHECK-CVT-LABEL: test_copysign_f32:
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24
+; CHECK-CVT-NEXT: bit.16b v0, v1, v2
+; CHECK-CVT-NEXT: fcvt h0, s0
+; CHECK-CVT-NEXT: ret
+
+; CHECK-FP16-LABEL: test_copysign_f32:
+; CHECK-FP16-NEXT: fcvt h1, s1
+; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8
+; CHECK-FP16-NEXT: bit.16b v0, v1, v2
+; CHECK-FP16-NEXT: ret
+
define half @test_copysign_f32(half %a, float %b) #0 {
%tb = fptrunc float %b to half
%r = call half @llvm.copysign.f16(half %a, half %tb)
ret half %r
}
-; CHECK-COMMON-LABEL: test_copysign_f64:
-; CHECK-COMMON-NEXT: fcvt s1, d1
-; CHECK-COMMON-NEXT: fcvt s0, h0
-; CHECK-COMMON-NEXT: movi.4s v2, #128, lsl #24
-; CHECK-COMMON-NEXT: bit.16b v0, v1, v2
-; CHECK-COMMON-NEXT: fcvt h0, s0
-; CHECK-COMMON-NEXT: ret
+; CHECK-CVT-LABEL: test_copysign_f64:
+; CHECK-CVT-NEXT: fcvt s1, d1
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24
+; CHECK-CVT-NEXT: bit.16b v0, v1, v2
+; CHECK-CVT-NEXT: fcvt h0, s0
+; CHECK-CVT-NEXT: ret
+
+; CHECK-FP16-LABEL: test_copysign_f64:
+; CHECK-FP16-NEXT: fcvt h1, d1
+; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8
+; CHECK-FP16-NEXT: bit.16b v0, v1, v2
+; CHECK-FP16-NEXT: ret
+
define half @test_copysign_f64(half %a, double %b) #0 {
%tb = fptrunc double %b to half
%r = call half @llvm.copysign.f16(half %a, half %tb)
@@ -974,12 +994,19 @@ define half @test_copysign_f64(half %a,
; Check that the FP promotion will use a truncating FP_ROUND, so we can fold
; away the (fpext (fp_round <result>)) here.
-; CHECK-COMMON-LABEL: test_copysign_extended:
-; CHECK-COMMON-NEXT: fcvt s1, h1
-; CHECK-COMMON-NEXT: fcvt s0, h0
-; CHECK-COMMON-NEXT: movi.4s v2, #128, lsl #24
-; CHECK-COMMON-NEXT: bit.16b v0, v1, v2
-; CHECK-COMMON-NEXT: ret
+; CHECK-CVT-LABEL: test_copysign_extended:
+; CHECK-CVT-NEXT: fcvt s1, h1
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24
+; CHECK-CVT-NEXT: bit.16b v0, v1, v2
+; CHECK-CVT-NEXT: ret
+
+; CHECK-FP16-LABEL: test_copysign_extended:
+; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8
+; CHECK-FP16-NEXT: bit.16b v0, v1, v2
+; CHECK-FP16-NEXT: fcvt s0, h0
+; CHECK-FP16-NEXT: ret
+
define float @test_copysign_extended(half %a, half %b) #0 {
%r = call half @llvm.copysign.f16(half %a, half %b)
%xr = fpext half %r to float
More information about the llvm-commits
mailing list