[llvm] 7f12efa - [X86][FP16] Lower half->i16 into vcvttph2[u]w directly
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 4 02:45:03 PST 2023
Author: Phoebe Wang
Date: 2023-02-04T18:23:56+08:00
New Revision: 7f12efa88e17548d98f3e7425687f4afe0df34ed
URL: https://github.com/llvm/llvm-project/commit/7f12efa88e17548d98f3e7425687f4afe0df34ed
DIFF: https://github.com/llvm/llvm-project/commit/7f12efa88e17548d98f3e7425687f4afe0df34ed.diff
LOG: [X86][FP16] Lower half->i16 into vcvttph2[u]w directly
Reviewed By: LuoYuanke, RKSimon
Differential Revision: https://reviews.llvm.org/D143170
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512-cvt.ll
llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e9228e45bc37d..a218871ae0908 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1680,16 +1680,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
- for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
+ for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
}
- setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom);
+
+ for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
+ }
+
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
@@ -22830,19 +22834,24 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
- if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) {
+ // v8f32/v16f32/v8f64->v8i16/v16i16 need to widen first.
+ if (VT.getVectorElementType() == MVT::i16) {
+ assert((SrcVT.getVectorElementType() == MVT::f32 ||
+ SrcVT.getVectorElementType() == MVT::f64) &&
+ "Expected f32/f64 vector!");
+ MVT NVT = VT.changeVectorElementType(MVT::i32);
if (IsStrict) {
Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
: ISD::STRICT_FP_TO_UINT,
- dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
+ dl, {NVT, MVT::Other}, {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
- MVT::v8i32, Src);
+ NVT, Src);
}
// TODO: Need to add exception check code for strict FP.
- Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
if (IsStrict)
return DAG.getMergeValues({Res, Chain}, dl);
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index bfd9b322bd457..93aba2efb0338 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -424,7 +424,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) {
define <16 x i16> @f32to16us(<16 x float> %f) {
; ALL-LABEL: f32to16us:
; ALL: # %bb.0:
-; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
+; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
; ALL-NEXT: vpmovdw %zmm0, %ymm0
; ALL-NEXT: retq
%res = fptoui <16 x float> %f to <16 x i16>
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
index 4c873eb3ac1a8..b1bedcf157eed 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
@@ -771,8 +771,7 @@ define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
define <16 x i16> @test_s16tof16(<16 x half> %a) {
; CHECK-LABEL: test_s16tof16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
-; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+; CHECK-NEXT: vcvttph2w %ymm0, %ymm0
; CHECK-NEXT: retq
%res = fptosi <16 x half> %a to <16 x i16>
ret <16 x i16> %res
@@ -781,8 +780,7 @@ define <16 x i16> @test_s16tof16(<16 x half> %a) {
define <16 x i16> @test_u16tof16(<16 x half> %a) {
; CHECK-LABEL: test_u16tof16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
-; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0
; CHECK-NEXT: retq
%res = fptoui <16 x half> %a to <16 x i16>
ret <16 x i16> %res
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
index bc0dd022bfae4..36d6f863b37a1 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
@@ -59,8 +59,7 @@ define <8 x i32> @strict_vector_fptoui_v8f16_to_v8i32(<8 x half> %a) #0 {
define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 {
; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
-; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+; CHECK-NEXT: vcvttph2w %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half> %a,
metadata !"fpexcept.strict") #0
@@ -70,8 +69,7 @@ define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 {
define <16 x i16> @strict_vector_fptoui_v16f16_to_v16i16(<16 x half> %a) #0 {
; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
-; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half> %a,
metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
index af52e5fa98b61..cd39206fb14b4 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
@@ -798,7 +798,7 @@ define <16 x i16> @strict_vector_fptosi_v16f32_to_v16i16(<16 x float> %a) #0 {
define <16 x i16> @strict_vector_fptoui_v16f32_to_v16i16(<16 x float> %a) #0 {
; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0
+; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0
; CHECK-NEXT: vpmovdw %zmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float> %a,
More information about the llvm-commits
mailing list