[llvm] c4b41e8 - [LegalizeDAG][X86] Enable STRICT_FP_TO_SINT/UINT to be promoted

Tue Nov 19 16:15:40 PST 2019

Author: Craig Topper
Date: 2019-11-19T16:14:37-08:00
New Revision: c4b41e8d1d860e2439c7c0e16bd1d1af3fe2d023

URL: https://github.com/llvm/llvm-project/commit/c4b41e8d1d860e2439c7c0e16bd1d1af3fe2d023
DIFF: https://github.com/llvm/llvm-project/commit/c4b41e8d1d860e2439c7c0e16bd1d1af3fe2d023.diff

LOG: [LegalizeDAG][X86] Enable STRICT_FP_TO_SINT/UINT to be promoted

Differential Revision: https://reviews.llvm.org/D70220

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b5500e12463d..ec099823c473 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -176,8 +176,8 @@ class SelectionDAGLegalize {
                                const SDLoc &dl);
   SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
                                 const SDLoc &dl);
-  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
-                                const SDLoc &dl);
+  void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+                             SmallVectorImpl<SDValue> &Results);
 
   SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
   SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
@@ -2480,9 +2480,13 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
 /// we promote it.  At this point, we know that the result and operand types are
 /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
 /// operation that returns a larger result.
-SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
-                                                    bool isSigned,
-                                                    const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+                                                 SmallVectorImpl<SDValue> &Results) {
+  bool IsStrict = N->isStrictFPOpcode();
+  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
+                  N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+  EVT DestVT = N->getValueType(0);
+  SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
   // First step, figure out the appropriate FP_TO*INT operation to use.
   EVT NewOutTy = DestVT;
 
@@ -2495,26 +2499,32 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
 
     // A larger signed type can hold all unsigned values of the requested type,
     // so using FP_TO_SINT is valid
-    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
-      OpToUse = ISD::FP_TO_SINT;
+    OpToUse = IsStrict ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT;
+    if (TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
       break;
-    }
 
     // However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
-    if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
-      OpToUse = ISD::FP_TO_UINT;
+    OpToUse = IsStrict ? ISD::STRICT_FP_TO_UINT : ISD::FP_TO_UINT;
+    if (!IsSigned && TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
       break;
-    }
 
     // Otherwise, try a larger type.
   }
 
   // Okay, we found the operation and type to use.
-  SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+  SDValue Operation;
+  if (IsStrict) {
+    SDVTList VTs = DAG.getVTList(NewOutTy, MVT::Other);
+    Operation = DAG.getNode(OpToUse, dl, VTs, N->getOperand(0), LegalOp);
+  } else
+    Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
 
   // Truncate the result of the extended FP_TO_*INT operation to the desired
   // size.
-  return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+  Results.push_back(Trunc);
+  if (IsStrict)
+    Results.push_back(Operation.getValue(1));
 }
 
 /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
@@ -4181,10 +4191,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     break;
   }
   case ISD::FP_TO_UINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::FP_TO_SINT:
-    Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
-                                 Node->getOpcode() == ISD::FP_TO_SINT, dl);
-    Results.push_back(Tmp1);
+  case ISD::STRICT_FP_TO_SINT:
+    PromoteLegalFP_TO_INT(Node, dl, Results);
     break;
   case ISD::UINT_TO_FP:
   case ISD::SINT_TO_FP:

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ef3a02d159dd..d4aaa7a9ee6b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -249,8 +249,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
     // this operation.
-    setOperationAction(ISD::FP_TO_SINT,         MVT::i8,  Promote);
-    // FIXME: setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
+    setOperationAction(ISD::FP_TO_SINT,        MVT::i8,  Promote);
+    // FIXME: This doesn't generate invalid exception when it should. PR44019.
+    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8,  Promote);
     setOperationAction(ISD::FP_TO_SINT,        MVT::i16, Custom);
     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
     setOperationAction(ISD::FP_TO_SINT,        MVT::i32, Custom);
@@ -263,9 +264,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     // Handle FP_TO_UINT by promoting the destination to a larger signed
     // conversion.
     setOperationAction(ISD::FP_TO_UINT,        MVT::i8,  Promote);
-    // FIXME: setOperationAction(ISD::STRICT_FP_TO_UINT,  MVT::i8,  Promote);
+    // FIXME: This doesn't generate invalid exception when it should. PR44019.
+    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8,  Promote);
     setOperationAction(ISD::FP_TO_UINT,        MVT::i16, Promote);
-    // FIXME: setOperationAction(ISD::STRICT_FP_TO_UINT,  MVT::i16, Promote);
+    // FIXME: This doesn't generate invalid exception when it should. PR44019.
+    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
     setOperationAction(ISD::FP_TO_UINT,        MVT::i32, Custom);
     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
     setOperationAction(ISD::FP_TO_UINT,        MVT::i64, Custom);

diff  --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index a321be50427c..58041c29ab64 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -893,6 +893,52 @@ entry:
   ret double %rem
 }
 
+; Verify that fptosi(%x) isn't simplified when the rounding mode is
+; unknown.
+; Verify that no gross errors happen.
+; FIXME: The SSE/AVX code does not raise an invalid exception for all values
+; that don't fit in i8.
+define i8 @f20s8(double %x) #0 {
+; X87-LABEL: f20s8:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $8, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 12
+; X87-NEXT:    fldl {{[0-9]+}}(%esp)
+; X87-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    orl $3072, %eax # imm = 0xC00
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X87-NEXT:    fistps {{[0-9]+}}(%esp)
+; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X87-NEXT:    addl $8, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: f20s8:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    cvttsd2si {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    # kill: def $al killed $al killed $eax
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: f20s8:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvttsd2si %xmm0, %eax
+; SSE-NEXT:    # kill: def $al killed $al killed $eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: f20s8:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX-NEXT:    # kill: def $al killed $al killed $eax
+; AVX-NEXT:    retq
+entry:
+  %result = call i8 @llvm.experimental.constrained.fptosi.i8.f64(double %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i8 %result
+}
+
 ; Verify that fptosi(%x) isn't simplified when the rounding mode is
 ; unknown.
 ; Verify that no gross errors happen.
@@ -1038,6 +1084,98 @@ entry:
   ret i64 %result
 }
 
+; Verify that fptoui(%x) isn't simplified when the rounding mode is
+; unknown.
+; Verify that no gross errors happen.
+; FIXME: The SSE/AVX code does not raise an invalid exception for all values
+; that don't fit in i8.
+define i8 @f20u8(double %x) #0 {
+; X87-LABEL: f20u8:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $8, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 12
+; X87-NEXT:    fldl {{[0-9]+}}(%esp)
+; X87-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    orl $3072, %eax # imm = 0xC00
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X87-NEXT:    fistps {{[0-9]+}}(%esp)
+; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X87-NEXT:    addl $8, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: f20u8:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    cvttsd2si {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    # kill: def $al killed $al killed $eax
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: f20u8:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvttsd2si %xmm0, %eax
+; SSE-NEXT:    # kill: def $al killed $al killed $eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: f20u8:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX-NEXT:    # kill: def $al killed $al killed $eax
+; AVX-NEXT:    retq
+entry:
+  %result = call i8 @llvm.experimental.constrained.fptoui.i8.f64(double %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i8 %result
+}
+; Verify that fptoui(%x) isn't simplified when the rounding mode is
+; unknown.
+; Verify that no gross errors happen.
+; FIXME: The SSE/AVX code does not raise an invalid exception for all values
+; that don't fit in i16.
+define i16 @f20u16(double %x) #0 {
+; X87-LABEL: f20u16:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $8, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 12
+; X87-NEXT:    fldl {{[0-9]+}}(%esp)
+; X87-NEXT:    fnstcw (%esp)
+; X87-NEXT:    movzwl (%esp), %eax
+; X87-NEXT:    orl $3072, %eax # imm = 0xC00
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X87-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X87-NEXT:    fldcw (%esp)
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    # kill: def $ax killed $ax killed $eax
+; X87-NEXT:    addl $8, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: f20u16:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    cvttsd2si {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: f20u16:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvttsd2si %xmm0, %eax
+; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: f20u16:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX-NEXT:    retq
+entry:
+  %result = call i16 @llvm.experimental.constrained.fptoui.i16.f64(double %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i16 %result
+}
+
 ; Verify that fptoui(%x) isn't simplified when the rounding mode is
 ; unknown.
 ; Verify that no gross errors happen.
@@ -1681,9 +1819,12 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
 declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare i8  @llvm.experimental.constrained.fptosi.i8.f64(double, metadata)
 declare i16 @llvm.experimental.constrained.fptosi.i16.f64(double, metadata)
 declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
 declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata)
+declare i8  @llvm.experimental.constrained.fptoui.i8.f64(double, metadata)
+declare i16 @llvm.experimental.constrained.fptoui.i16.f64(double, metadata)
 declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata)
 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)