[llvm] 6333679 - [FPEnv] Default NoFPExcept SDNodeFlag to false

Ulrich Weigand via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 2 08:00:38 PST 2020


Author: Ulrich Weigand
Date: 2020-01-02T16:59:45+01:00
New Revision: 63336795f0d50a009e8ec034d95811170efc978b

URL: https://github.com/llvm/llvm-project/commit/63336795f0d50a009e8ec034d95811170efc978b
DIFF: https://github.com/llvm/llvm-project/commit/63336795f0d50a009e8ec034d95811170efc978b.diff

LOG: [FPEnv] Default NoFPExcept SDNodeFlag to false

The NoFPExcept bit in SDNodeFlags currently defaults to true, unlike all
other such flags. This is a problem, because it implies that all code that
transforms SDNodes without copying flags can introduce a correctness bug,
not just a missed optimization.

This patch changes the default to false. This makes it necessary to move
setting the (No)FPExcept flag for constrained intrinsics from the
visitConstrainedIntrinsic routine to the generic visit routine at the
place where the other flags are set, or else the intersectFlagsWith
call would erase the NoFPExcept flag again.

In order to avoid making non-strict FP code worse, whenever
SelectionDAGISel::SelectCodeCommon matches on a set of orignal nodes
none of which can raise FP exceptions, it will preserve this property
on all results nodes generated, by setting the NoFPExcept flag on
those result nodes that would otherwise be considered as raising
an FP exception.

To check whether or not an SD node should be considered as raising
an FP exception, the following logic applies:

- For machine nodes, check the mayRaiseFPException property of
  the underlying MI instruction
- For regular nodes, check isStrictFPOpcode
- For target nodes, check a newly introduced isTargetStrictFPOpcode

The latter is implemented by reserving a range of target opcodes,
similarly to how memory opcodes are identified. (Note that there a
bit of a quirk in identifying target nodes that are both memory nodes
and strict FP nodes. To simplify the logic, right now all target memory
nodes are automatically also considered strict FP nodes -- this could
be fixed by adding one more range.)

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D71841

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/ISDOpcodes.h
    llvm/include/llvm/CodeGen/SelectionDAGISel.h
    llvm/include/llvm/CodeGen/SelectionDAGNodes.h
    llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/Target/SystemZ/SystemZISelLowering.h
    llvm/lib/Target/X86/X86ISelLowering.h
    llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll
    llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
    llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 23918cb14d46..63b9b5ac0f85 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -937,11 +937,16 @@ namespace ISD {
     BUILTIN_OP_END
   };
 
+  /// FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations
+  /// which cannot raise FP exceptions should be less than this value.
+  /// Those that do must not be less than this value.
+  static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END+400;
+
   /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations
   /// which do not reference a specific memory location should be less than
   /// this value. Those that do must not be less than this value, and can
   /// be used with SelectionDAG::getMemIntrinsicNode.
-  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+400;
+  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+500;
 
   //===--------------------------------------------------------------------===//
   /// MemIndexedMode enum - This enum defines the load / store indexed

diff  --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 7babada12e6d..9874d782c782 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -310,6 +310,9 @@ class SelectionDAGISel : public MachineFunctionPass {
     return false;
   }
 
+  /// Return whether the node may raise an FP exception.
+  bool mayRaiseFPException(SDNode *Node) const;
+
   bool isOrEquivalentToAdd(const SDNode *N) const;
 
 private:

diff  --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index e18278f8cdc6..d81a4a8fd43f 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -387,7 +387,7 @@ struct SDNodeFlags {
         Exact(false), NoNaNs(false), NoInfs(false),
         NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
         AllowContract(false), ApproximateFuncs(false),
-        AllowReassociation(false), NoFPExcept(true) {}
+        AllowReassociation(false), NoFPExcept(false) {}
 
   /// Propagate the fast-math-flags from an IR FPMathOperator.
   void copyFMF(const FPMathOperator &FPMO) {
@@ -450,9 +450,9 @@ struct SDNodeFlags {
     setDefined();
     AllowReassociation = b;
   }
-  void setFPExcept(bool b) {
+  void setNoFPExcept(bool b) {
     setDefined();
-    NoFPExcept = !b;
+    NoFPExcept = b;
   }
 
   // These are accessors for each flag.
@@ -467,7 +467,7 @@ struct SDNodeFlags {
   bool hasAllowContract() const { return AllowContract; }
   bool hasApproximateFuncs() const { return ApproximateFuncs; }
   bool hasAllowReassociation() const { return AllowReassociation; }
-  bool hasFPExcept() const { return !NoFPExcept; }
+  bool hasNoFPExcept() const { return NoFPExcept; }
 
   bool isFast() const {
     return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
@@ -666,6 +666,15 @@ END_TWO_BYTE_PACK()
   /// \<target\>ISD namespace).
   bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
 
+  /// Test if this node has a target-specific opcode that may raise
+  /// FP exceptions (in the \<target\>ISD namespace and greater than
+  /// FIRST_TARGET_STRICTFP_OPCODE).  Note that all target memory
+  /// opcode are currently automatically considered to possibly raise
+  /// FP exceptions as well.
+  bool isTargetStrictFPOpcode() const {
+    return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
+  }
+
   /// Test if this node has a target-specific
   /// memory-referencing opcode (in the \<target\>ISD namespace and
   /// greater than FIRST_TARGET_MEMORY_OPCODE).

diff  --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c5095995ec2e..a0c8e83cd8a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -882,7 +882,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
     if (Flags.hasExact())
       MI->setFlag(MachineInstr::MIFlag::IsExact);
 
-    if (Flags.hasFPExcept())
+    if (MI->getDesc().mayRaiseFPException() && !Flags.hasNoFPExcept())
       MI->setFlag(MachineInstr::MIFlag::FPExcept);
   }
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f71ad8635584..b78b8e93435e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1108,6 +1108,15 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
         Node->intersectFlagsWith(IncomingFlags);
     }
   }
+  // Constrained FP intrinsics with fpexcept.ignore should also get
+  // the NoFPExcept flag.
+  if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I))
+    if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore)
+      if (SDNode *Node = getNodeForIRValue(&I)) {
+        SDNodeFlags Flags = Node->getFlags();
+        Flags.setNoFPExcept(true);
+        Node->setFlags(Flags);
+      }
 
   if (!I.isTerminator() && !HasTailCall &&
       !isStatepoint(&I)) // statepoints handle their exports internally
@@ -6972,12 +6981,6 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   SDVTList VTs = DAG.getVTList(ValueVTs);
   SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers);
 
-  if (FPI.getExceptionBehavior() != fp::ExceptionBehavior::ebIgnore) {
-    SDNodeFlags Flags;
-    Flags.setFPExcept(true);
-    Result->setFlags(Flags);
-  }
-
   assert(Result.getNode()->getNumValues() == 2);
   // See above -- chain is handled like for loads here.
   SDValue OutChain = Result.getValue(1);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 92e4160d69d5..b5018ec2111a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -547,8 +547,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getFlags().hasVectorReduction())
     OS << " vector-reduction";
 
-  if (getFlags().hasFPExcept())
-    OS << " fpexcept";
+  if (getFlags().hasNoFPExcept())
+    OS << " nofpexcept";
 
   if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
     if (!MN->memoperands_empty()) {

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 8317a11caa03..62e519438ca6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -3458,6 +3458,17 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
       if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
         Ops.push_back(InputGlue);
 
+      // Check whether any matched node could raise an FP exception.  Since all
+      // such nodes must have a chain, it suffices to check ChainNodesMatched.
+      // We need to perform this check before potentially modifying one of the
+      // nodes via MorphNode.
+      bool MayRaiseFPException = false;
+      for (auto *N : ChainNodesMatched)
+        if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) {
+          MayRaiseFPException = true;
+          break;
+        }
+
       // Create the node.
       MachineSDNode *Res = nullptr;
       bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
@@ -3489,6 +3500,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
                                             Ops, EmitNodeInfo));
       }
 
+      // Set the NoFPExcept flag when no original matched node could
+      // raise an FP exception, but the new node potentially might.
+      if (!MayRaiseFPException && mayRaiseFPException(Res)) {
+        SDNodeFlags Flags = Res->getFlags();
+        Flags.setNoFPExcept(true);
+        Res->setFlags(Flags);
+      }
+
       // If the node had chain/glue results, update our notion of the current
       // chain and glue.
       if (EmitNodeInfo & OPFL_GlueOutput) {
@@ -3644,6 +3663,21 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
   }
 }
 
+/// Return whether the node may raise an FP exception.
+bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const {
+  // For machine opcodes, consult the MCID flag.
+  if (N->isMachineOpcode()) {
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    return MCID.mayRaiseFPException();
+  }
+
+  // For ISD opcodes, only StrictFP opcodes may raise an FP
+  // exception.
+  if (N->isTargetOpcode())
+    return N->isTargetStrictFPOpcode();
+  return N->isStrictFPOpcode();
+}
+
 bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
   assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
   auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0bfd1b62db2d..1049a897c6fe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6190,8 +6190,10 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
       // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
       // never raise any exception.
       SDNodeFlags Flags;
-      Flags.setFPExcept(Node->getFlags().hasFPExcept());
+      Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
       Fast->setFlags(Flags);
+      Flags.setNoFPExcept(true);
+      Slow->setFlags(Flags);
     } else {
       SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
       Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);

diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 0ac07a12ab71..defcaa6eb6eb 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -58,8 +58,7 @@ enum NodeType : unsigned {
   ICMP,
 
   // Floating-point comparisons.  The two operands are the values to compare.
-  // Regular and strict (quiet and signaling) versions.
-  FCMP, STRICT_FCMP, STRICT_FCMPS,
+  FCMP,
 
   // Test under mask.  The first operand is ANDed with the second operand
   // and the condition codes are set on the result.  The third operand is
@@ -249,10 +248,9 @@ enum NodeType : unsigned {
   // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1
   // vector result.  VFCMPE is for "ordered and equal", VFCMPH for "ordered and
   // greater than" and VFCMPHE for "ordered and greater than or equal to".
-  // Regular and strict (quiet and signaling) versions.
-  VFCMPE, STRICT_VFCMPE, STRICT_VFCMPES,
-  VFCMPH, STRICT_VFCMPH, STRICT_VFCMPHS,
-  VFCMPHE, STRICT_VFCMPHE, STRICT_VFCMPHES,
+  VFCMPE,
+  VFCMPH,
+  VFCMPHE,
 
   // Likewise, but also set the condition codes on the result.
   VFCMPES,
@@ -263,12 +261,12 @@ enum NodeType : unsigned {
   VFTCI,
 
   // Extend the even f32 elements of vector operand 0 to produce a vector
-  // of f64 elements.  Regular and strict versions.
-  VEXTEND, STRICT_VEXTEND,
+  // of f64 elements.
+  VEXTEND,
 
   // Round the f64 elements of vector operand 0 to f32s and store them in the
-  // even elements of the result.  Regular and strict versions.
-  VROUND, STRICT_VROUND,
+  // even elements of the result.
+  VROUND,
 
   // AND the two vector operands together and set CC based on the result.
   VTM,
@@ -292,6 +290,24 @@ enum NodeType : unsigned {
   // Operand 1: the bit mask
   TDC,
 
+  // Strict variants of scalar floating-point comparisons.
+  // Quiet and signaling versions.
+  STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+  STRICT_FCMPS,
+
+  // Strict variants of vector floating-point comparisons.
+  // Quiet and signaling versions.
+  STRICT_VFCMPE,
+  STRICT_VFCMPH,
+  STRICT_VFCMPHE,
+  STRICT_VFCMPES,
+  STRICT_VFCMPHS,
+  STRICT_VFCMPHES,
+
+  // Strict variants of VEXTEND and VROUND.
+  STRICT_VEXTEND,
+  STRICT_VROUND,
+
   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
   // ATOMIC_LOAD_<op>.
   //

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 16b076e85af8..f48bacd13919 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -79,9 +79,6 @@ namespace llvm {
       /// X86 compare and logical compare instructions.
       CMP, COMI, UCOMI,
 
-      /// X86 strict FP compare instructions.
-      STRICT_FCMP, STRICT_FCMPS,
-
       /// X86 bit-test instructions.
       BT,
 
@@ -325,7 +322,6 @@ namespace llvm {
 
       // Vector packed double/float comparison.
       CMPP,
-      STRICT_CMPP,
 
       // Vector integer comparisons.
       PCMPEQ, PCMPGT,
@@ -338,7 +334,6 @@ namespace llvm {
       /// Vector comparison generating mask bits for fp and
       /// integer signed and unsigned data types.
       CMPM,
-      STRICT_CMPM,
       // Vector comparison with SAE for FP values
       CMPM_SAE,
 
@@ -506,7 +501,6 @@ namespace llvm {
 
       // Vector float/double to signed/unsigned integer with truncation.
       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
-      STRICT_CVTTP2SI, STRICT_CVTTP2UI,
       // Scalar float/double to signed/unsigned integer with truncation.
       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 
@@ -605,6 +599,20 @@ namespace llvm {
       // For avx512-vp2intersect
       VP2INTERSECT,
 
+      /// X86 strict FP compare instructions.
+      STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+      STRICT_FCMPS,
+
+      // Vector packed double/float comparison.
+      STRICT_CMPP,
+
+      /// Vector comparison generating mask bits for fp and
+      /// integer signed and unsigned data types.
+      STRICT_CMPM,
+
+      // Vector float/double to signed/unsigned integer with truncation.
+      STRICT_CVTTP2SI, STRICT_CVTTP2UI,
+
       // Compare and swap.
       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LCMPXCHG8_DAG,

diff  --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll
index c2228046d607..22ba8fab86ab 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll
@@ -4,7 +4,7 @@ define i32 @f20u(double %x) #0 {
 ; CHECK-LABEL: name: f20u
 ; CHECK: liveins: $xmm0
 ; CHECK: [[COPY:%[0-9]+]]:fr64 = COPY $xmm0
-; CHECK: [[CVTTSD2SI64rr:%[0-9]+]]:gr64 = CVTTSD2SI64rr [[COPY]], implicit $mxcsr
+; CHECK: [[CVTTSD2SI64rr:%[0-9]+]]:gr64 = fpexcept CVTTSD2SI64rr [[COPY]], implicit $mxcsr
 ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY [[CVTTSD2SI64rr]].sub_32bit
 ; CHECK: $eax = COPY [[COPY1]]
 ; CHECK: RET 0, $eax

diff  --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
index 5df3d8f7a4a8..8fc496a05f1d 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
@@ -29,14 +29,14 @@ entry:
 ; CHECK-LABEL: name: f20u64
 ; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16)
 ; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool)
-; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
+; CHECK: fpexcept COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
 ; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD
 ; CHECK: JCC_1
 ; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[MOVSDrm_alt1]], {{.*}}, [[FsFLD0SD]], {{.*}}
-; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr
+; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = fpexcept SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr
 ; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[SUBSDrr]] :: (store 8 into %stack.0)
 ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags
-; CHECK: [[LD_Fp64m:%[0-9]+]]:rfp64 = LD_Fp64m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0)
+; CHECK: [[LD_Fp64m:%[0-9]+]]:rfp64 = fpexcept LD_Fp64m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0)
 ; CHECK: FNSTCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit $fpcw :: (store 2 into %stack.1)
 ; CHECK: [[MOVZX32rm16_:%[0-9]+]]:gr32 = MOVZX32rm16 %stack.1, 1, $noreg, 0, $noreg :: (load 2 from %stack.1)
 ; CHECK: [[OR32ri:%[0-9]+]]:gr32 = OR32ri killed [[MOVZX32rm16_]], 3072, implicit-def $eflags
@@ -59,7 +59,7 @@ entry:
 define i8 @f20s8(double %x) #0 {
 entry:
 ; CHECK-LABEL: name: f20s8
-; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16)
+; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = fpexcept CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16)
 ; CHECK: [[COPY:%[0-9]+]]:gr32_abcd = COPY [[CVTTSD2SIrm]]
 ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit
 ; CHECK: $al = COPY [[COPY1]]
@@ -71,7 +71,7 @@ entry:
 define i16 @f20s16(double %x) #0 {
 entry:
 ; CHECK-LABEL: name: f20s16
-; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16)
+; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = fpexcept CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16)
 ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY [[CVTTSD2SIrm]].sub_16bit
 ; CHECK: $ax = COPY [[COPY]]
 ; CHECK: RET 0, $ax
@@ -84,15 +84,15 @@ entry:
 ; CHECK-LABEL: name: f20u
 ; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16)
 ; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool)
-; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
+; CHECK: fpexcept COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
 ; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD
 ; CHECK: JCC_1
 ; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[MOVSDrm_alt1]], {{.*}}, [[FsFLD0SD]], {{.*}}
 ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags
 ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
 ; CHECK: [[SHL32ri:%[0-9]+]]:gr32 = SHL32ri [[MOVZX32rr8_]], 31, implicit-def dead $eflags
-; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr
-; CHECK: [[CVTTSD2SIrr:%[0-9]+]]:gr32 = CVTTSD2SIrr killed [[SUBSDrr]], implicit $mxcsr
+; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = fpexcept SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr
+; CHECK: [[CVTTSD2SIrr:%[0-9]+]]:gr32 = fpexcept CVTTSD2SIrr killed [[SUBSDrr]], implicit $mxcsr
 ; CHECK: [[XOR32rr:%[0-9]+]]:gr32 = XOR32rr [[CVTTSD2SIrr]], killed [[SHL32ri]], implicit-def dead $eflags
 ; CHECK: $eax = COPY [[XOR32rr]]
 ; CHECK: RET 0, $eax

diff  --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll
index 6b9d84be4070..a8d1b53c37c9 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll
@@ -3,7 +3,7 @@
 define <1 x float> @constrained_vector_fadd_v1f32() #0 {
 ; CHECK-LABEL: name: constrained_vector_fadd_v1f32
 ; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
-; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
+; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = fpexcept ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
 ; CHECK: $xmm0 = COPY [[ADDSSrm]]
 ; CHECK: RET 0, $xmm0
 entry:
@@ -15,9 +15,9 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 {
 ; CHECK-LABEL: name: constrained_vector_fadd_v3f32
 ; CHECK: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS
 ; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
-; CHECK: [[ADDSSrr:%[0-9]+]]:fr32 = ADDSSrr [[MOVSSrm_alt]], killed [[FsFLD0SS]], implicit $mxcsr
-; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
-; CHECK: [[ADDSSrm1:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
+; CHECK: [[ADDSSrr:%[0-9]+]]:fr32 = fpexcept ADDSSrr [[MOVSSrm_alt]], killed [[FsFLD0SS]], implicit $mxcsr
+; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = fpexcept ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
+; CHECK: [[ADDSSrm1:%[0-9]+]]:fr32 = fpexcept ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
 ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY [[ADDSSrm1]]
 ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[ADDSSrm]]
 ; CHECK: [[UNPCKLPSrr:%[0-9]+]]:vr128 = UNPCKLPSrr [[COPY1]], killed [[COPY]]
@@ -38,8 +38,8 @@ entry:
 define <4 x double> @constrained_vector_fadd_v4f64() #0 {
 ; CHECK-LABEL: name: constrained_vector_fadd_v4f64
 ; CHECK: [[MOVAPDrm:%[0-9]+]]:vr128 = MOVAPDrm $rip, 1, $noreg, %const.0, $noreg :: (load 16 from constant-pool)
-; CHECK: [[ADDPDrm:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 16 from constant-pool)
-; CHECK: [[ADDPDrm1:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 16 from constant-pool)
+; CHECK: [[ADDPDrm:%[0-9]+]]:vr128 = fpexcept ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 16 from constant-pool)
+; CHECK: [[ADDPDrm1:%[0-9]+]]:vr128 = fpexcept ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 16 from constant-pool)
 ; CHECK: $xmm0 = COPY [[ADDPDrm]]
 ; CHECK: $xmm1 = COPY [[ADDPDrm1]]
 ; CHECK: RET 0, $xmm0, $xmm1


        


More information about the llvm-commits mailing list