[llvm] r355134 - [AArch64] Improve FP16 vector convert from short instructions.
Abderrazek Zaafrani via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 28 12:21:47 PST 2019
Author: az
Date: Thu Feb 28 12:21:46 2019
New Revision: 355134
URL: http://llvm.org/viewvc/llvm-project?rev=355134&view=rev
Log:
[AArch64] Improve FP16 vector convert from short instructions.
https://reviews.llvm.org/D58563
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll
llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=355134&r1=355133&r2=355134&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Feb 28 12:21:46 2019
@@ -660,14 +660,9 @@ AArch64TargetLowering::AArch64TargetLowe
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
- setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
- setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
- // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
- // -> v8f16 conversions.
+ // i8 vector elements also need promotion to i32 for v8i8
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
- setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
- setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
@@ -678,6 +673,20 @@ AArch64TargetLowering::AArch64TargetLowe
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
+ if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
+ } else {
+ // when AArch64 doesn't have fullfp16 support, promote the input
+ // to i32 first.
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
+ }
+
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
Modified: llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll?rev=355134&r1=355133&r2=355134&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll Thu Feb 28 12:21:46 2019
@@ -156,21 +156,22 @@ define <4 x half> @sitofp_i8(<4 x i8> %a
; CHECK-COMMON-LABEL: sitofp_i8:
; CHECK-COMMON-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8
; CHECK-COMMON-NEXT: sshr [[OP2:v[0-9]+\.4h]], [[OP1]], #8
-; CHECK-COMMON-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0
-; CHECK-COMMON-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]]
-; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP4]]
+; CHECK-FP16-NEXT: scvtf v0.4h, [[OP2]]
+; CHECK-CVT-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0
+; CHECK-CVT-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]]
+; CHECK-CVT-NEXT: fcvtn v0.4h, [[OP4]]
; CHECK-COMMON-NEXT: ret
%1 = sitofp <4 x i8> %a to <4 x half>
ret <4 x half> %1
}
-
define <4 x half> @sitofp_i16(<4 x i16> %a) #0 {
; CHECK-COMMON-LABEL: sitofp_i16:
-; CHECK-COMMON-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0
-; CHECK-COMMON-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
-; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]]
-; CHECK-COMMON-NEXT: ret
+; CHECK-FP16-NEXT: scvtf v0.4h, v0.4h
+; CHECK-CVT-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-CVT-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
+; CHECK-CVT-NEXT: fcvtn v0.4h, [[OP2]]
+; CHECK-COMMON-NEXT: ret
%1 = sitofp <4 x i16> %a to <4 x half>
ret <4 x half> %1
}
@@ -201,9 +202,10 @@ define <4 x half> @sitofp_i64(<4 x i64>
define <4 x half> @uitofp_i8(<4 x i8> %a) #0 {
; CHECK-COMMON-LABEL: uitofp_i8:
; CHECK-COMMON-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-COMMON-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
-; CHECK-COMMON-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
-; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]]
+; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h
+; CHECK-CVT-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-CVT-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
+; CHECK-CVT-NEXT: fcvtn v0.4h, [[OP2]]
; CHECK-COMMON-NEXT: ret
%1 = uitofp <4 x i8> %a to <4 x half>
ret <4 x half> %1
@@ -212,9 +214,10 @@ define <4 x half> @uitofp_i8(<4 x i8> %a
define <4 x half> @uitofp_i16(<4 x i16> %a) #0 {
; CHECK-COMMON-LABEL: uitofp_i16:
-; CHECK-COMMON-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
-; CHECK-COMMON-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
-; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]]
+; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h
+; CHECK-CVT-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-CVT-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
+; CHECK-CVT-NEXT: fcvtn v0.4h, [[OP2]]
; CHECK-COMMON-NEXT: ret
%1 = uitofp <4 x i16> %a to <4 x half>
ret <4 x half> %1
Modified: llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll?rev=355134&r1=355133&r2=355134&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll Thu Feb 28 12:21:46 2019
@@ -295,13 +295,14 @@ define <8 x half> @sitofp_i8(<8 x i8> %a
define <8 x half> @sitofp_i16(<8 x i16> %a) #0 {
; CHECK-LABEL: sitofp_i16:
-; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v0.8h, #0
-; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v0.4h, #0
-; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]]
-; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]]
-; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
-; CHECK-DAG: fcvtn v0.4h, [[HIF]]
-; CHECK: mov v0.d[1], v[[LOREG]].d[0]
+; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
+; CHECK-CVT-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v0.8h, #0
+; CHECK-CVT-NEXT: sshll [[HI:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-CVT-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]]
+; CHECK-CVT-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]]
+; CHECK-CVT-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
+; CHECK-CVT-DAG: fcvtn v0.4h, [[HIF]]
+; CHECK-CVT-NEXT: mov v0.d[1], v[[LOREG]].d[0]
%1 = sitofp <8 x i16> %a to <8 x half>
ret <8 x half> %1
}
@@ -347,13 +348,14 @@ define <8 x half> @uitofp_i8(<8 x i8> %a
define <8 x half> @uitofp_i16(<8 x i16> %a) #0 {
; CHECK-LABEL: uitofp_i16:
-; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v0.8h, #0
-; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v0.4h, #0
-; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]]
-; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]]
-; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
-; CHECK-DAG: fcvtn v0.4h, [[HIF]]
-; CHECK: mov v0.d[1], v[[LOREG]].d[0]
+; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
+; CHECK-CVT-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v0.8h, #0
+; CHECK-CVT-NEXT: ushll [[HI:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-CVT-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]]
+; CHECK-CVT-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]]
+; CHECK-CVT-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
+; CHECK-CVT-DAG: fcvtn v0.4h, [[HIF]]
+; CHECK-CVT-NEXT: mov v0.d[1], v[[LOREG]].d[0]
%1 = uitofp <8 x i16> %a to <8 x half>
ret <8 x half> %1
}
More information about the llvm-commits
mailing list