[llvm] c06e531 - [X86] Use 128-bit vector instructions for f32/f64->i64 conversions on 32-bit targets with avx512dq and avx512vl instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 24 11:20:44 PST 2019
Author: Craig Topper
Date: 2019-12-24T11:20:10-08:00
New Revision: c06e53119b1f04696fbcf710aaa0818cbfc99600
URL: https://github.com/llvm/llvm-project/commit/c06e53119b1f04696fbcf710aaa0818cbfc99600
DIFF: https://github.com/llvm/llvm-project/commit/c06e53119b1f04696fbcf710aaa0818cbfc99600.diff
LOG: [X86] Use 128-bit vector instructions for f32/f64->i64 conversions on 32-bit targets with avx512dq and avx512vl instructions.
On 32-bit targets we can't use the scalar instruction so we
insert the scalar into a vector and use packed conversions.
Previously we used either v4f32->v4i64 or v4f64->v4i64 to avoid
some complexity creating target specific ISD opcodes for
v4f32->v2i64. But this causes extra vzeroupper instructions and
possibly frequency throttling on Intel CPUs.
This patch changes this to create a 128-bit vector and uses a
target specific ISD opcode if needed.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c6f834cc46c6..2e7d3d8df733 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28690,12 +28690,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (Subtarget.hasDQI() && VT == MVT::i64 &&
(SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
assert(!Subtarget.is64Bit() && "i64 should be legal");
- unsigned NumElts = Subtarget.hasVLX() ? 4 : 8;
- // Using a 256-bit input here to guarantee 128-bit input for f32 case.
- // TODO: Use 128-bit vectors for f64 case?
- // TODO: Use 128-bit vectors for f32 by using CVTTP2SI/CVTTP2UI.
+ unsigned NumElts = Subtarget.hasVLX() ? 2 : 8;
+ // If we use a 128-bit result we might need to use a target specific node.
+ unsigned SrcElts =
+ std::max(NumElts, 128U / (unsigned)SrcVT.getSizeInBits());
MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts);
- MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts);
+ MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), SrcElts);
+ unsigned Opc = N->getOpcode();
+ if (NumElts != SrcElts) {
+ if (IsStrict)
+ Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+ else
+ Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ }
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
@@ -28704,10 +28711,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Chain;
if (IsStrict) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
- Res = DAG.getNode(N->getOpcode(), SDLoc(N), Tys, N->getOperand(0), Res);
+ Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res);
Chain = Res.getValue(1);
} else
- Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res);
+ Res = DAG.getNode(Opc, SDLoc(N), VecVT, Res);
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx);
Results.push_back(Res);
if (IsStrict)
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
index 6164ebe672aa..30e4996057aa 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -42,19 +42,17 @@ define i64 @f_to_u64(float %a) nounwind {
; AVX512DQVL_32_WIN-LABEL: f_to_u64:
; AVX512DQVL_32_WIN: # %bb.0:
; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %xmm0
; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_WIN-NEXT: vzeroupper
; AVX512DQVL_32_WIN-NEXT: retl
;
; AVX512DQVL_32_LIN-LABEL: f_to_u64:
; AVX512DQVL_32_LIN: # %bb.0:
; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %xmm0
; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_LIN-NEXT: vzeroupper
; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: f_to_u64:
@@ -337,19 +335,17 @@ define i64 @f_to_s64(float %a) nounwind {
; AVX512DQVL_32_WIN-LABEL: f_to_s64:
; AVX512DQVL_32_WIN: # %bb.0:
; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %xmm0
; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_WIN-NEXT: vzeroupper
; AVX512DQVL_32_WIN-NEXT: retl
;
; AVX512DQVL_32_LIN-LABEL: f_to_s64:
; AVX512DQVL_32_LIN: # %bb.0:
; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %xmm0
; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_LIN-NEXT: vzeroupper
; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: f_to_s64:
@@ -524,19 +520,17 @@ define i64 @d_to_u64(double %a) nounwind {
; AVX512DQVL_32_WIN-LABEL: d_to_u64:
; AVX512DQVL_32_WIN: # %bb.0:
; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %xmm0, %xmm0
; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_WIN-NEXT: vzeroupper
; AVX512DQVL_32_WIN-NEXT: retl
;
; AVX512DQVL_32_LIN-LABEL: d_to_u64:
; AVX512DQVL_32_LIN: # %bb.0:
; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %xmm0, %xmm0
; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_LIN-NEXT: vzeroupper
; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: d_to_u64:
@@ -819,19 +813,17 @@ define i64 @d_to_s64(double %a) nounwind {
; AVX512DQVL_32_WIN-LABEL: d_to_s64:
; AVX512DQVL_32_WIN: # %bb.0:
; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %ymm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %xmm0, %xmm0
; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_WIN-NEXT: vzeroupper
; AVX512DQVL_32_WIN-NEXT: retl
;
; AVX512DQVL_32_LIN-LABEL: d_to_s64:
; AVX512DQVL_32_LIN: # %bb.0:
; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %ymm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %xmm0, %xmm0
; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_LIN-NEXT: vzeroupper
; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: d_to_s64:
More information about the llvm-commits
mailing list