[llvm] r319259 - [X86] Mark ISD::FP_TO_UINT v16i8/v16i16 as Promote under AVX512 instead of legal. Fix infinite loop in op legalization when promotion requires 2 steps.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 15:56:02 PST 2017
Author: ctopper
Date: Tue Nov 28 15:56:02 2017
New Revision: 319259
URL: http://llvm.org/viewvc/llvm-project?rev=319259&view=rev
Log:
[X86] Mark ISD::FP_TO_UINT v16i8/v16i16 as Promote under AVX512 instead of legal. Fix infinite loop in op legalization when promotion requires 2 steps.
Previously we had an isel pattern to add the truncate. Instead use Promote to add the truncate to the DAG before isel.
The Promote legalization code had to be updated to prevent an infinite loop if promotion took multiple steps because it wasn't remembering the previously tried value.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Tue Nov 28 15:56:02 2017
@@ -497,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_IN
"Can't promote a vector with multiple results!");
EVT VT = Op.getValueType();
- EVT NewVT;
+ EVT NewVT = VT;
unsigned NewOpc;
while (true) {
- NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
NewOpc = ISD::FP_TO_SINT;
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 28 15:56:02 2017
@@ -1174,8 +1174,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Nov 28 15:56:02 2017
@@ -7890,11 +7890,6 @@ defm VPMOVSWB : avx512_trunc_wb<0x20,
defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8>;
-def : Pat<(v16i16 (fp_to_uint (v16f32 VR512:$src1))),
- (VPMOVDWZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
-def : Pat<(v16i8 (fp_to_uint (v16f32 VR512:$src1))),
- (VPMOVDBZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
-
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
(v8i16 (EXTRACT_SUBREG
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Tue Nov 28 15:56:02 2017
@@ -442,7 +442,7 @@ define <16 x i32> @f32to16ui(<16 x float
define <16 x i8> @f32to16uc(<16 x float> %f) {
; ALL-LABEL: f32to16uc:
; ALL: # BB#0:
-; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
+; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: vpmovdb %zmm0, %xmm0
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
@@ -453,7 +453,7 @@ define <16 x i8> @f32to16uc(<16 x float>
define <16 x i16> @f32to16us(<16 x float> %f) {
; ALL-LABEL: f32to16us:
; ALL: # BB#0:
-; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
+; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: vpmovdw %zmm0, %ymm0
; ALL-NEXT: retq
%res = fptoui <16 x float> %f to <16 x i16>
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Tue Nov 28 15:56:02 2017
@@ -1578,14 +1578,14 @@ define <16 x i32> @f32to16ui(<16 x float
define <16 x i8> @f32to16uc(<16 x float> %f) {
; GENERIC-LABEL: f32to16uc:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0
+; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0
; GENERIC-NEXT: vpmovdb %zmm0, %xmm0
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to16uc:
; SKX: # BB#0:
-; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1596,13 +1596,13 @@ define <16 x i8> @f32to16uc(<16 x float>
define <16 x i16> @f32to16us(<16 x float> %f) {
; GENERIC-LABEL: f32to16us:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0
+; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0
; GENERIC-NEXT: vpmovdw %zmm0, %ymm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to16us:
; SKX: # BB#0:
-; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = fptoui <16 x float> %f to <16 x i16>
More information about the llvm-commits
mailing list