[llvm] r363417 - [FPEnv] Lower STRICT_FP_EXTEND and STRICT_FP_ROUND nodes in preprocess phase of ISelLowering to mirror non-strict nodes on x86.

Fri Jun 14 09:28:55 PDT 2019

Author: kpn
Date: Fri Jun 14 09:28:55 2019
New Revision: 363417

URL: http://llvm.org/viewvc/llvm-project?rev=363417&view=rev
Log:
[FPEnv] Lower STRICT_FP_EXTEND and STRICT_FP_ROUND nodes in preprocess phase of ISelLowering to mirror non-strict nodes on x86.

I recently discovered a bug on the x86 platform: The fp80 type was not handled well by x86 for constrained floating point nodes, as their regular counterparts are replaced by extending loads and truncating stores during the preprocess phase. Normally, platforms don't have this issue, as they don't typically attempt to perform such legalizations during instruction selection preprocessing. Before this change, strict_fp nodes survived until they were mutated to normal nodes, which happened shortly after preprocessing on other platforms. This modification lowers these nodes at the same phase while properly utilizing the chain.5

Submitted by:	Drew Wock <drew.wock at sas.com>
Reviewed by:	Craig Topper, Kevin P. Neal
Approved by:	Craig Topper
Differential Revision:	https://reviews.llvm.org/D63271

Added:
    llvm/trunk/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=363417&r1=363416&r2=363417&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Jun 14 09:28:55 2019
@@ -931,59 +931,124 @@ void X86DAGToDAGISel::PreprocessISelDAG(
     // and the node legalization.  As such this pass basically does "really
     // late" legalization of these inline with the X86 isel pass.
     // FIXME: This should only happen when not compiled with -O0.
-    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
-      continue;
+    switch (N->getOpcode()) {
+    default: continue;
+    case ISD::FP_ROUND:
+    case ISD::FP_EXTEND:
+    {
+      MVT SrcVT = N->getOperand(0).getSimpleValueType();
+      MVT DstVT = N->getSimpleValueType(0);
 
-    MVT SrcVT = N->getOperand(0).getSimpleValueType();
-    MVT DstVT = N->getSimpleValueType(0);
+      // If any of the sources are vectors, no fp stack involved.
+      if (SrcVT.isVector() || DstVT.isVector())
+        continue;
 
-    // If any of the sources are vectors, no fp stack involved.
-    if (SrcVT.isVector() || DstVT.isVector())
-      continue;
-
-    // If the source and destination are SSE registers, then this is a legal
-    // conversion that should not be lowered.
-    const X86TargetLowering *X86Lowering =
-        static_cast<const X86TargetLowering *>(TLI);
-    bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
-    bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
-    if (SrcIsSSE && DstIsSSE)
-      continue;
-
-    if (!SrcIsSSE && !DstIsSSE) {
-      // If this is an FPStack extension, it is a noop.
-      if (N->getOpcode() == ISD::FP_EXTEND)
+      // If the source and destination are SSE registers, then this is a legal
+      // conversion that should not be lowered.
+      const X86TargetLowering *X86Lowering =
+          static_cast<const X86TargetLowering *>(TLI);
+      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+      if (SrcIsSSE && DstIsSSE)
         continue;
-      // If this is a value-preserving FPStack truncation, it is a noop.
-      if (N->getConstantOperandVal(1))
+
+      if (!SrcIsSSE && !DstIsSSE) {
+        // If this is an FPStack extension, it is a noop.
+        if (N->getOpcode() == ISD::FP_EXTEND)
+          continue;
+        // If this is a value-preserving FPStack truncation, it is a noop.
+        if (N->getConstantOperandVal(1))
+          continue;
+      }
+
+      // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+      // FPStack has extload and truncstore.  SSE can fold direct loads into other
+      // operations.  Based on this, decide what we want to do.
+      MVT MemVT;
+      if (N->getOpcode() == ISD::FP_ROUND)
+        MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+      else
+        MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+      SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+      SDLoc dl(N);
+
+      // FIXME: optimize the case where the src/dest is a load or store?
+
+      SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
+                                          MemTmp, MachinePointerInfo(), MemVT);
+      SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                          MachinePointerInfo(), MemVT);
+
+      // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+      // extload we created.  This will cause general havok on the dag because
+      // anything below the conversion could be folded into other existing nodes.
+      // To avoid invalidating 'I', back it up to the convert node.
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+      break;
+    }
+
+    //The sequence of events for lowering STRICT_FP versions of these nodes requires
+    //dealing with the chain differently, as there is already a preexisting chain.
+    case ISD::STRICT_FP_ROUND:
+    case ISD::STRICT_FP_EXTEND:
+    {
+      MVT SrcVT = N->getOperand(1).getSimpleValueType();
+      MVT DstVT = N->getSimpleValueType(0);
+
+      // If any of the sources are vectors, no fp stack involved.
+      if (SrcVT.isVector() || DstVT.isVector())
         continue;
+
+      // If the source and destination are SSE registers, then this is a legal
+      // conversion that should not be lowered.
+      const X86TargetLowering *X86Lowering =
+          static_cast<const X86TargetLowering *>(TLI);
+      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+      if (SrcIsSSE && DstIsSSE)
+        continue;
+
+      if (!SrcIsSSE && !DstIsSSE) {
+        // If this is an FPStack extension, it is a noop.
+        if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
+          continue;
+        // If this is a value-preserving FPStack truncation, it is a noop.
+        if (N->getConstantOperandVal(2))
+          continue;
+      }
+
+      // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+      // FPStack has extload and truncstore.  SSE can fold direct loads into other
+      // operations.  Based on this, decide what we want to do.
+      MVT MemVT;
+      if (N->getOpcode() == ISD::STRICT_FP_ROUND)
+        MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+      else
+        MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+      SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+      SDLoc dl(N);
+
+      // FIXME: optimize the case where the src/dest is a load or store?
+
+      //Since the operation is StrictFP, use the preexisting chain.
+      SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
+                                MemTmp, MachinePointerInfo(), MemVT);
+      SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                          MachinePointerInfo(), MemVT);
+
+      // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+      // extload we created.  This will cause general havok on the dag because
+      // anything below the conversion could be folded into other existing nodes.
+      // To avoid invalidating 'I', back it up to the convert node.
+      --I;
+      CurDAG->ReplaceAllUsesWith(N, Result.getNode());
+      break;
+    }
     }
 
-    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
-    // FPStack has extload and truncstore.  SSE can fold direct loads into other
-    // operations.  Based on this, decide what we want to do.
-    MVT MemVT;
-    if (N->getOpcode() == ISD::FP_ROUND)
-      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
-    else
-      MemVT = SrcIsSSE ? SrcVT : DstVT;
-
-    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
-    SDLoc dl(N);
-
-    // FIXME: optimize the case where the src/dest is a load or store?
-    SDValue Store =
-        CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
-                              MemTmp, MachinePointerInfo(), MemVT);
-    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
-                                        MachinePointerInfo(), MemVT);
-
-    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
-    // extload we created.  This will cause general havok on the dag because
-    // anything below the conversion could be folded into other existing nodes.
-    // To avoid invalidating 'I', back it up to the convert node.
-    --I;
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
 
     // Now that we did that, the node is dead.  Increment the iterator to the
     // next node to process, then delete N.

Added: llvm/trunk/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/constrained-fp80-trunc-ext.ll?rev=363417&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/constrained-fp80-trunc-ext.ll (added)
+++ llvm/trunk/test/CodeGen/X86/constrained-fp80-trunc-ext.ll Fri Jun 14 09:28:55 2019
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=x86_64-gnu-linux < %s | FileCheck %s
+
+define x86_fp80 @constrained_fpext_f32_as_fp80(float %mem) {
+; CHECK-LABEL: constrained_fpext_f32_as_fp80:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    flds -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
+entry:
+  %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(
+            float %mem,
+            metadata !"fpexcept.strict")
+  ret x86_fp80 %ext
+}
+
+define float @constrained_fptrunc_f80_to_f32(x86_fp80 %reg) {
+; CHECK-LABEL: constrained_fptrunc_f80_to_f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    retq
+  %trunc = call float @llvm.experimental.constrained.fptrunc.f32.f80(
+             x86_fp80 %reg,
+             metadata !"round.dynamic",
+             metadata !"fpexcept.strict")
+  ret float %trunc
+}
+
+define x86_fp80 @constrained_fpext_f64_to_f80(double %mem) {
+; CHECK-LABEL: constrained_fpext_f64_to_f80:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
+entry:
+  %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(
+            double %mem,
+            metadata !"fpexcept.strict")
+  ret x86_fp80 %ext
+}
+
+define double @constrained_fptrunc_f80_to_f64(x86_fp80 %reg) {
+; CHECK-LABEL: constrained_fptrunc_f80_to_f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    retq
+  %trunc = call double @llvm.experimental.constrained.fptrunc.f64.f80(
+             x86_fp80 %reg,
+             metadata !"round.dynamic",
+             metadata !"fpexcept.strict")
+  ret double %trunc
+}
+
+declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float, metadata)
+declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(double, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f80(x86_fp80, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f80(x86_fp80, metadata, metadata)