[llvm] 90ff34e - [X86] Add custom widening for v2i32->v2f64 strict_uint_to_fp with AVX512F, but not AVX512VL.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 26 13:41:23 PST 2019
Author: Craig Topper
Date: 2019-12-26T13:40:56-08:00
New Revision: 90ff34e6ab38a333b16890c0d4b7ca891d213652
URL: https://github.com/llvm/llvm-project/commit/90ff34e6ab38a333b16890c0d4b7ca891d213652
DIFF: https://github.com/llvm/llvm-project/commit/90ff34e6ab38a333b16890c0d4b7ca891d213652.diff
LOG: [X86] Add custom widening for v2i32->v2f64 strict_uint_to_fp with AVX512F, but not AVX512VL.
Previously we were widening with isel patterns, but that wasn't
exception safe for strict FP. So now we widen to v4i32->v4f64
during type legalization. And then let op legalization further
widen to v8i32->v8f64.
The vec_int_to_fp.ll changes are caused by us no longer narrowing
extracts of strict_uint_to_fp to the v4i32->v2f64 instruction
without AVX512VL only to have isel rewiden it. Now we just keep
it wide throughout. So we don't have an opportunity to narrow
the load.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
llvm/test/CodeGen/X86/vec_int_to_fp.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ac001f1e9f03..54e0a02be35b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18870,17 +18870,35 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
SDValue N0 = Op.getOperand(IsStrict ? 1 : 0);
assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");
- // Legalize to v4i32 type.
- N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
- DAG.getUNDEF(MVT::v2i32));
-
if (Subtarget.hasAVX512()) {
+ if (!Subtarget.hasVLX()) {
+ // Let generic type legalization widen this.
+ if (!IsStrict)
+ return SDValue();
+ // Otherwise pad the integer input with 0s and widen the operation.
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
+ DAG.getConstant(0, DL, MVT::v2i32));
+ SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other},
+ {Op.getOperand(0), N0});
+ SDValue Chain = Res.getValue(1);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2f64, Res,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getMergeValues({Res, Chain}, DL);
+ }
+
+ // Legalize to v4i32 type.
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
+ DAG.getUNDEF(MVT::v2i32));
if (IsStrict)
return DAG.getNode(X86ISD::STRICT_CVTUI2P, DL, {MVT::v2f64, MVT::Other},
{Op.getOperand(0), N0});
return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0);
}
+ // Legalize to v4i32 type.
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
+ DAG.getUNDEF(MVT::v2i32));
+
// Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT,
// but using v2i32 to v2f64 with X86ISD::CVTSI2P.
SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32);
@@ -45303,7 +45321,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0));
}
// v2f64 CVTUDQ2PD(v4i32).
- if (InOpcode == ISD::UINT_TO_FP &&
+ if (InOpcode == ISD::UINT_TO_FP && Subtarget.hasVLX() &&
InVec.getOperand(0).getValueType() == MVT::v4i32) {
return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0));
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index c3da6d2a7c57..a446afa9c9bd 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8397,11 +8397,6 @@ def : Pat<(v4f64 (any_uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v2f64 (X86any_VUintToFP (v4i32 VR128X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
- (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
}
let Predicates = [HasVLX] in {
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
index c68c61231715..9f1a9241581a 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
@@ -809,10 +809,9 @@ define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 {
; AVX1-64-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX1-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: uitofp_v2i32_v2f64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
@@ -823,10 +822,9 @@ define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 {
; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v2i32_v2f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 1d0106b75a84..b24155233c4d 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -3593,7 +3593,7 @@ define <2 x double> @uitofp_load_4i32_to_2f64_2(<4 x i32>* %x) {
;
; AVX512F-LABEL: uitofp_load_4i32_to_2f64_2:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512F-NEXT: vmovaps (%rdi), %xmm0
; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
@@ -3606,7 +3606,7 @@ define <2 x double> @uitofp_load_4i32_to_2f64_2(<4 x i32>* %x) {
;
; AVX512DQ-LABEL: uitofp_load_4i32_to_2f64_2:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 3c3e3ed6127d..75f16029e659 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -6852,7 +6852,7 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
;
; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
More information about the llvm-commits
mailing list