[llvm] r319259 - [X86] Mark ISD::FP_TO_UINT v16i8/v16i16 as Promote under AVX512 instead of legal. Fix infinite loop in op legalization when promotion requires 2 steps.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 28 15:56:02 PST 2017


Author: ctopper
Date: Tue Nov 28 15:56:02 2017
New Revision: 319259

URL: http://llvm.org/viewvc/llvm-project?rev=319259&view=rev
Log:
[X86] Mark ISD::FP_TO_UINT v16i8/v16i16 as Promote under AVX512 instead of legal. Fix infinite loop in op legalization when promotion requires 2 steps.

Previously we had an isel pattern to add the truncate. Instead use Promote to add the truncate to the DAG before isel.

The Promote legalization code had to be updated to prevent an infinite loop if promotion took multiple steps because it wasn't remembering the previously tried value.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Tue Nov 28 15:56:02 2017
@@ -497,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_IN
          "Can't promote a vector with multiple results!");
   EVT VT = Op.getValueType();
 
-  EVT NewVT;
+  EVT NewVT = VT;
   unsigned NewOpc;
   while (true) {
-    NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+    NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
     assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
     if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
       NewOpc = ISD::FP_TO_SINT;

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 28 15:56:02 2017
@@ -1174,8 +1174,8 @@ X86TargetLowering::X86TargetLowering(con
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i8, Legal);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i16, Legal);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i8, Promote);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i16, Promote);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v2i32, Custom);

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Nov 28 15:56:02 2017
@@ -7890,11 +7890,6 @@ defm VPMOVSWB   : avx512_trunc_wb<0x20,
 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus,
                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
 
-def : Pat<(v16i16 (fp_to_uint (v16f32 VR512:$src1))),
-          (VPMOVDWZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
-def : Pat<(v16i8 (fp_to_uint (v16f32 VR512:$src1))),
-          (VPMOVDBZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
-
 let Predicates = [HasAVX512, NoVLX] in {
 def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
          (v8i16 (EXTRACT_SUBREG

Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Tue Nov 28 15:56:02 2017
@@ -442,7 +442,7 @@ define <16 x i32> @f32to16ui(<16 x float
 define <16 x i8> @f32to16uc(<16 x float> %f) {
 ; ALL-LABEL: f32to16uc:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vcvttps2udq %zmm0, %zmm0
+; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
 ; ALL-NEXT:    vpmovdb %zmm0, %xmm0
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
@@ -453,7 +453,7 @@ define <16 x i8> @f32to16uc(<16 x float>
 define <16 x i16> @f32to16us(<16 x float> %f) {
 ; ALL-LABEL: f32to16us:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vcvttps2udq %zmm0, %zmm0
+; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
 ; ALL-NEXT:    vpmovdw %zmm0, %ymm0
 ; ALL-NEXT:    retq
   %res = fptoui <16 x float> %f to <16 x i16>

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319259&r1=319258&r2=319259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Tue Nov 28 15:56:02 2017
@@ -1578,14 +1578,14 @@ define <16 x i32> @f32to16ui(<16 x float
 define <16 x i8> @f32to16uc(<16 x float> %f) {
 ; GENERIC-LABEL: f32to16uc:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vcvttps2udq %zmm0, %zmm0
+; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0
 ; GENERIC-NEXT:    vpmovdb %zmm0, %xmm0
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f32to16uc:
 ; SKX:       # BB#0:
-; SKX-NEXT:    vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
@@ -1596,13 +1596,13 @@ define <16 x i8> @f32to16uc(<16 x float>
 define <16 x i16> @f32to16us(<16 x float> %f) {
 ; GENERIC-LABEL: f32to16us:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vcvttps2udq %zmm0, %zmm0
+; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0
 ; GENERIC-NEXT:    vpmovdw %zmm0, %ymm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f32to16us:
 ; SKX:       # BB#0:
-; SKX-NEXT:    vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %res = fptoui <16 x float> %f to <16 x i16>




More information about the llvm-commits mailing list