[llvm] 6a6c527 - [X86][FP16] Combine two steps conversions into direct conversion
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 13 06:58:04 PST 2023
Author: Phoebe Wang
Date: 2023-02-13T22:57:56+08:00
New Revision: 6a6c527ee287a4a7787fb5c519014c2e22f718c3
URL: https://github.com/llvm/llvm-project/commit/6a6c527ee287a4a7787fb5c519014c2e22f718c3
DIFF: https://github.com/llvm/llvm-project/commit/6a6c527ee287a4a7787fb5c519014c2e22f718c3.diff
LOG: [X86][FP16] Combine two steps conversions into direct conversion
When both v8i64 and v4f16 are not legal in a v8i64->v8f16 conversion, legalizer will breaks it into v8i64->v4i64->v4f32->v8f32->v8f16.
Given we support v4i64->v8f16, we can combine them with a shuffle instruction.
Reviewed By: LuoYuanke
Differential Revision: https://reviews.llvm.org/D143872
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512fp16-cvt.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 72c3bfd821c8b..24bd18df57e12 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -56680,9 +56680,6 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
- if (Subtarget.hasFP16())
- return SDValue();
-
bool IsStrict = N->isStrictFPOpcode();
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
@@ -56692,11 +56689,47 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
SrcVT.getVectorElementType() != MVT::f32)
return SDValue();
+ SDLoc dl(N);
+
+ SDValue Cvt, Chain;
unsigned NumElts = VT.getVectorNumElements();
- if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ if (Subtarget.hasFP16()) {
+ // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64), ..)))
+ // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64), ..))
+ if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS) {
+ SDValue Cvt0, Cvt1;
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ bool IsOp0Strict = Op0->isStrictFPOpcode();
+ if (Op0.getOpcode() != Op1.getOpcode() ||
+ Op0.getOperand(IsOp0Strict ? 1 : 0).getValueType() != MVT::v4i64 ||
+ Op1.getOperand(IsOp0Strict ? 1 : 0).getValueType() != MVT::v4i64) {
+ return SDValue();
+ }
+ int Mask[8] = {0, 1, 2, 3, 8, 9, 10, 11};
+ if (IsStrict) {
+ assert(IsOp0Strict && "Op0 must be strict node");
+ unsigned Opc = Op0.getOpcode() == ISD::STRICT_SINT_TO_FP
+ ? X86ISD::STRICT_CVTSI2P
+ : X86ISD::STRICT_CVTUI2P;
+ Cvt0 = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {Op0.getOperand(0), Op0.getOperand(1)});
+ Cvt1 = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {Op1.getOperand(0), Op1.getOperand(1)});
+ Cvt = DAG.getVectorShuffle(MVT::v8f16, dl, Cvt0, Cvt1, Mask);
+ return DAG.getMergeValues({Cvt, Cvt0.getValue(1)}, dl);
+ }
+ unsigned Opc = Op0.getOpcode() == ISD::SINT_TO_FP ? X86ISD::CVTSI2P
+ : X86ISD::CVTUI2P;
+ Cvt0 = DAG.getNode(Opc, dl, MVT::v8f16, Op0.getOperand(0));
+ Cvt1 = DAG.getNode(Opc, dl, MVT::v8f16, Op1.getOperand(0));
+ return Cvt = DAG.getVectorShuffle(MVT::v8f16, dl, Cvt0, Cvt1, Mask);
+ }
return SDValue();
+ }
- SDLoc dl(N);
+ if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ return SDValue();
// Widen to at least 4 input elements.
if (NumElts < 4)
@@ -56704,9 +56737,8 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
DAG.getConstantFP(0.0, dl, SrcVT));
// Destination is v8i16 with at least 8 elements.
- EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
- std::max(8U, NumElts));
- SDValue Cvt, Chain;
+ EVT CvtVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i16, std::max(8U, NumElts));
SDValue Rnd = DAG.getTargetConstant(4, dl, MVT::i32);
if (IsStrict) {
Cvt = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {CvtVT, MVT::Other},
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
index 1aa7b1ce73fdc..c3a979f9840bd 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
@@ -1031,10 +1031,9 @@ define half @f128_to_half(fp128 %x) nounwind {
define <8 x half> @s64tof16(<8 x i64> %a) #0 {
; CHECK-LABEL: s64tof16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtqq2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vcvtqq2ph %ymm1, %xmm1
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%1 = sitofp <8 x i64> %a to <8 x half>
@@ -1044,10 +1043,9 @@ define <8 x half> @s64tof16(<8 x i64> %a) #0 {
define <8 x half> @u64tof16(<8 x i64> %a) #0 {
; CHECK-LABEL: u64tof16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtuqq2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vcvtuqq2ph %ymm1, %xmm1
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%1 = uitofp <8 x i64> %a to <8 x half>
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
index 7cdd214ca139b..cdb2d69e8b53f 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
@@ -146,10 +146,9 @@ define <4 x half> @uitofp_v4i64_v4f16(<4 x i64> %x) #0 {
define <8 x half> @sitofp_v8i64_v8f16(<8 x i64> %x) #1 {
; CHECK-LABEL: sitofp_v8i64_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtqq2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vcvtqq2ph %ymm1, %xmm1
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64> %x,
@@ -161,10 +160,9 @@ define <8 x half> @sitofp_v8i64_v8f16(<8 x i64> %x) #1 {
define <8 x half> @uitofp_v8i64_v8f16(<8 x i64> %x) #1 {
; CHECK-LABEL: uitofp_v8i64_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtuqq2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vcvtuqq2ph %ymm1, %xmm1
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64> %x,
More information about the llvm-commits
mailing list