[llvm] 92e96c7 - [X86][GISel] Add DU chain lookups for LOAD & STORE (#87453)

Tue Apr 16 04:06:20 PDT 2024

Author: Malay Sanghi
Date: 2024-04-16T13:06:17+02:00
New Revision: 92e96c7bbacbb477265c7e5ff6c49a6de5d4ee69

URL: https://github.com/llvm/llvm-project/commit/92e96c7bbacbb477265c7e5ff6c49a6de5d4ee69
DIFF: https://github.com/llvm/llvm-project/commit/92e96c7bbacbb477265c7e5ff6c49a6de5d4ee69.diff

LOG: [X86][GISel] Add DU chain lookups for LOAD & STORE (#87453)

For G_LOAD and G_STORE we want this information during regbankselect.
Today we treat load dest as integer and insert converts.

---------

Co-authored-by: Evgenii Kudriashov <evgenii.kudriashov at intel.com>

Added: 
    llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/Utils.h
    llvm/lib/CodeGen/GlobalISel/Utils.cpp
    llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
    llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
    llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
    llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
    llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
    llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
    llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
    llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 807cec3c177d9f..c4174cee5e10c6 100644

--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -555,5 +555,9 @@ void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
 /// debug users of \p MI by writing the effect of \p MI in a DIExpression.
 void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI);
 
+/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
+/// having only floating-point operands.
+bool isPreISelGenericFloatingPointOpcode(unsigned Opc);
+
 } // End namespace llvm.
 #endif

diff  --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index c3bc3203b63605..ae43e9ccf6112d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1665,3 +1665,47 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) {
     }
   }
 }
+
+bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::G_FABS:
+  case TargetOpcode::G_FADD:
+  case TargetOpcode::G_FCANONICALIZE:
+  case TargetOpcode::G_FCEIL:
+  case TargetOpcode::G_FCONSTANT:
+  case TargetOpcode::G_FCOPYSIGN:
+  case TargetOpcode::G_FCOS:
+  case TargetOpcode::G_FDIV:
+  case TargetOpcode::G_FEXP2:
+  case TargetOpcode::G_FEXP:
+  case TargetOpcode::G_FFLOOR:
+  case TargetOpcode::G_FLOG10:
+  case TargetOpcode::G_FLOG2:
+  case TargetOpcode::G_FLOG:
+  case TargetOpcode::G_FMA:
+  case TargetOpcode::G_FMAD:
+  case TargetOpcode::G_FMAXIMUM:
+  case TargetOpcode::G_FMAXNUM:
+  case TargetOpcode::G_FMAXNUM_IEEE:
+  case TargetOpcode::G_FMINIMUM:
+  case TargetOpcode::G_FMINNUM:
+  case TargetOpcode::G_FMINNUM_IEEE:
+  case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FNEARBYINT:
+  case TargetOpcode::G_FNEG:
+  case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPOW:
+  case TargetOpcode::G_FPTRUNC:
+  case TargetOpcode::G_FREM:
+  case TargetOpcode::G_FRINT:
+  case TargetOpcode::G_FSIN:
+  case TargetOpcode::G_FSQRT:
+  case TargetOpcode::G_FSUB:
+  case TargetOpcode::G_INTRINSIC_ROUND:
+  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+  case TargetOpcode::G_INTRINSIC_TRUNC:
+    return true;
+  default:
+    return false;
+  }
+}

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index d39de770eaf16e..d5c4ce1888e78c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -424,43 +424,6 @@ void AArch64RegisterBankInfo::applyMappingImpl(
   }
 }
 
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
-  switch (Opc) {
-  case TargetOpcode::G_FADD:
-  case TargetOpcode::G_FSUB:
-  case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FMA:
-  case TargetOpcode::G_FDIV:
-  case TargetOpcode::G_FCONSTANT:
-  case TargetOpcode::G_FPEXT:
-  case TargetOpcode::G_FPTRUNC:
-  case TargetOpcode::G_FCEIL:
-  case TargetOpcode::G_FFLOOR:
-  case TargetOpcode::G_FNEARBYINT:
-  case TargetOpcode::G_FNEG:
-  case TargetOpcode::G_FCOS:
-  case TargetOpcode::G_FSIN:
-  case TargetOpcode::G_FLOG10:
-  case TargetOpcode::G_FLOG:
-  case TargetOpcode::G_FLOG2:
-  case TargetOpcode::G_FSQRT:
-  case TargetOpcode::G_FABS:
-  case TargetOpcode::G_FEXP:
-  case TargetOpcode::G_FRINT:
-  case TargetOpcode::G_INTRINSIC_TRUNC:
-  case TargetOpcode::G_INTRINSIC_ROUND:
-  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
-  case TargetOpcode::G_FMAXNUM:
-  case TargetOpcode::G_FMINNUM:
-  case TargetOpcode::G_FMAXIMUM:
-  case TargetOpcode::G_FMINIMUM:
-    return true;
-  }
-  return false;
-}
-
 const RegisterBankInfo::InstructionMapping &
 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
     const MachineInstr &MI) const {

diff  --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 6af1fd8c88e570..62b58cba9f24a4 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -104,26 +104,6 @@ MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
   }
 }
 
-// Instructions where all register operands are floating point.
-static bool isFloatingPointOpcode(unsigned Opc) {
-  switch (Opc) {
-  case TargetOpcode::G_FCONSTANT:
-  case TargetOpcode::G_FADD:
-  case TargetOpcode::G_FSUB:
-  case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FDIV:
-  case TargetOpcode::G_FABS:
-  case TargetOpcode::G_FSQRT:
-  case TargetOpcode::G_FCEIL:
-  case TargetOpcode::G_FFLOOR:
-  case TargetOpcode::G_FPEXT:
-  case TargetOpcode::G_FPTRUNC:
-    return true;
-  default:
-    return false;
-  }
-}
-
 // Instructions where use operands are floating point registers.
 // Def operands are general purpose.
 static bool isFloatingPointOpcodeUse(unsigned Opc) {
@@ -133,7 +113,7 @@ static bool isFloatingPointOpcodeUse(unsigned Opc) {
   case TargetOpcode::G_FCMP:
     return true;
   default:
-    return isFloatingPointOpcode(Opc);
+    return isPreISelGenericFloatingPointOpcode(Opc);
   }
 }
 
@@ -145,7 +125,7 @@ static bool isFloatingPointOpcodeDef(unsigned Opc) {
   case TargetOpcode::G_UITOFP:
     return true;
   default:
-    return isFloatingPointOpcode(Opc);
+    return isPreISelGenericFloatingPointOpcode(Opc);
   }
 }
 

diff  --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index 6aeef145e3078f..125a49de7b27d4 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -13,6 +13,7 @@
 #include "PPCRegisterBankInfo.h"
 #include "PPCRegisterInfo.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
@@ -239,44 +240,6 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   return getInstructionMapping(MappingID, Cost, OperandsMapping, NumOperands);
 }
 
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
-  switch (Opc) {
-  case TargetOpcode::G_FADD:
-  case TargetOpcode::G_FSUB:
-  case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FMA:
-  case TargetOpcode::G_FDIV:
-  case TargetOpcode::G_FCONSTANT:
-  case TargetOpcode::G_FPEXT:
-  case TargetOpcode::G_FPTRUNC:
-  case TargetOpcode::G_FCEIL:
-  case TargetOpcode::G_FFLOOR:
-  case TargetOpcode::G_FNEARBYINT:
-  case TargetOpcode::G_FNEG:
-  case TargetOpcode::G_FCOS:
-  case TargetOpcode::G_FSIN:
-  case TargetOpcode::G_FLOG10:
-  case TargetOpcode::G_FLOG:
-  case TargetOpcode::G_FLOG2:
-  case TargetOpcode::G_FSQRT:
-  case TargetOpcode::G_FABS:
-  case TargetOpcode::G_FEXP:
-  case TargetOpcode::G_FRINT:
-  case TargetOpcode::G_INTRINSIC_TRUNC:
-  case TargetOpcode::G_INTRINSIC_ROUND:
-  case TargetOpcode::G_FMAXNUM:
-  case TargetOpcode::G_FMINNUM:
-  case TargetOpcode::G_FMAXIMUM:
-  case TargetOpcode::G_FMINIMUM:
-    return true;
-  }
-  return false;
-}
-
 /// \returns true if a given intrinsic \p ID only uses and defines FPRs.
 static bool isFPIntrinsic(unsigned ID) {
   // TODO: Add more intrinsics.

diff  --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 86e44343b50865..cc534f29685f25 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -154,46 +154,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
   return &RISCV::ValueMappings[Idx];
 }
 
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
-  switch (Opc) {
-  case TargetOpcode::G_FADD:
-  case TargetOpcode::G_FSUB:
-  case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FMA:
-  case TargetOpcode::G_FDIV:
-  case TargetOpcode::G_FCONSTANT:
-  case TargetOpcode::G_FPEXT:
-  case TargetOpcode::G_FPTRUNC:
-  case TargetOpcode::G_FCEIL:
-  case TargetOpcode::G_FFLOOR:
-  case TargetOpcode::G_FNEARBYINT:
-  case TargetOpcode::G_FNEG:
-  case TargetOpcode::G_FCOPYSIGN:
-  case TargetOpcode::G_FCOS:
-  case TargetOpcode::G_FSIN:
-  case TargetOpcode::G_FLOG10:
-  case TargetOpcode::G_FLOG:
-  case TargetOpcode::G_FLOG2:
-  case TargetOpcode::G_FSQRT:
-  case TargetOpcode::G_FABS:
-  case TargetOpcode::G_FEXP:
-  case TargetOpcode::G_FRINT:
-  case TargetOpcode::G_INTRINSIC_TRUNC:
-  case TargetOpcode::G_INTRINSIC_ROUND:
-  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
-  case TargetOpcode::G_FMAXNUM:
-  case TargetOpcode::G_FMINNUM:
-  case TargetOpcode::G_FMAXIMUM:
-  case TargetOpcode::G_FMINIMUM:
-    return true;
-  }
-  return false;
-}
-
 // TODO: Make this more like AArch64?
 bool RISCVRegisterBankInfo::hasFPConstraints(
     const MachineInstr &MI, const MachineRegisterInfo &MRI,

diff  --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index e7c9e60ba95f16..9e85424e76e620 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -13,10 +13,13 @@
 #include "X86RegisterBankInfo.h"
 #include "X86InstrInfo.h"
 #include "X86Subtarget.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterBank.h"
 #include "llvm/CodeGen/RegisterBankInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/IntrinsicsX86.h"
 
 #define GET_TARGET_REGBANK_IMPL
 #include "X86GenRegisterBank.inc"
@@ -68,6 +71,98 @@ X86RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
   llvm_unreachable("Unsupported register kind yet.");
 }
 
+// \returns true if a given intrinsic only uses and defines FPRs.
+static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
+                          const MachineInstr &MI) {
+  // TODO: Add more intrinsics.
+  switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+  default:
+    return false;
+  // SSE1
+  case Intrinsic::x86_sse_rcp_ss:
+  case Intrinsic::x86_sse_rcp_ps:
+  case Intrinsic::x86_sse_rsqrt_ss:
+  case Intrinsic::x86_sse_rsqrt_ps:
+  case Intrinsic::x86_sse_min_ss:
+  case Intrinsic::x86_sse_min_ps:
+  case Intrinsic::x86_sse_max_ss:
+  case Intrinsic::x86_sse_max_ps:
+    return true;
+  }
+  return false;
+}
+
+bool X86RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
+                                           const MachineRegisterInfo &MRI,
+                                           const TargetRegisterInfo &TRI,
+                                           unsigned Depth) const {
+  unsigned Op = MI.getOpcode();
+  if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
+    return true;
+
+  // Do we have an explicit floating point instruction?
+  if (isPreISelGenericFloatingPointOpcode(Op))
+    return true;
+
+  // No. Check if we have a copy-like instruction. If we do, then we could
+  // still be fed by floating point instructions.
+  if (Op != TargetOpcode::COPY && !MI.isPHI() &&
+      !isPreISelGenericOptimizationHint(Op))
+    return false;
+
+  // Check if we already know the register bank.
+  auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
+  if (RB == &getRegBank(X86::PSRRegBankID))
+    return true;
+  if (RB == &getRegBank(X86::GPRRegBankID))
+    return false;
+
+  // We don't know anything.
+  //
+  // If we have a phi, we may be able to infer that it will be assigned a fp
+  // type based off of its inputs.
+  if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+    return false;
+
+  return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
+    return Op.isReg() &&
+           onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
+  });
+}
+
+bool X86RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+                                     const MachineRegisterInfo &MRI,
+                                     const TargetRegisterInfo &TRI,
+                                     unsigned Depth) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FCMP:
+  case TargetOpcode::G_LROUND:
+  case TargetOpcode::G_LLROUND:
+  case TargetOpcode::G_INTRINSIC_TRUNC:
+  case TargetOpcode::G_INTRINSIC_ROUND:
+    return true;
+  default:
+    break;
+  }
+  return hasFPConstraints(MI, MRI, TRI, Depth);
+}
+
+bool X86RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+                                        const MachineRegisterInfo &MRI,
+                                        const TargetRegisterInfo &TRI,
+                                        unsigned Depth) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_UITOFP:
+    return true;
+  default:
+    break;
+  }
+  return hasFPConstraints(MI, MRI, TRI, Depth);
+}
+
 X86GenRegisterBankInfo::PartialMappingIdx
 X86GenRegisterBankInfo::getPartialMappingIdx(const MachineInstr &MI,
                                              const LLT &Ty, bool isFP) {
@@ -180,11 +275,13 @@ X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI,
 const RegisterBankInfo::InstructionMapping &
 X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   unsigned Opc = MI.getOpcode();
 
-  // Try the default logic for non-generic instructions that are either copies
-  // or already have some operands assigned to banks.
+  // Try the default logic for non-generic instructions that are either
+  // copies or already have some operands assigned to banks.
   if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) {
     const InstructionMapping &Mapping = getInstrMappingImpl(MI);
     if (Mapping.isValid())
@@ -221,13 +318,14 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case TargetOpcode::G_FPEXT:
   case TargetOpcode::G_FPTRUNC:
   case TargetOpcode::G_FCONSTANT:
-    // Instruction having only floating-point operands (all scalars in VECRReg)
+    // Instruction having only floating-point operands (all scalars in
+    // VECRReg)
     getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
     break;
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_FPTOSI: {
-    // Some of the floating-point instructions have mixed GPR and FP operands:
-    // fine-tune the computed mapping.
+    // Some of the floating-point instructions have mixed GPR and FP
+    // operands: fine-tune the computed mapping.
     auto &Op0 = MI.getOperand(0);
     auto &Op1 = MI.getOperand(1);
     const LLT Ty0 = MRI.getType(Op0.getReg());
@@ -271,9 +369,36 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
     getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ isFPTrunc || isFPAnyExt,
                                OpRegBankIdx);
-  } break;
+    break;
+  }
+  case TargetOpcode::G_LOAD: {
+    // Check if that load feeds fp instructions.
+    // In that case, we want the default mapping to be on FPR
+    // instead of blind map every scalar to GPR.
+    bool IsFP = any_of(MRI.use_nodbg_instructions(cast<GLoad>(MI).getDstReg()),
+                       [&](const MachineInstr &UseMI) {
+                         // If we have at least one direct use in a FP
+                         // instruction, assume this was a floating point load
+                         // in the IR. If it was not, we would have had a
+                         // bitcast before reaching that instruction.
+                         return onlyUsesFP(UseMI, MRI, TRI);
+                       });
+    getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx);
+    break;
+  }
+  case TargetOpcode::G_STORE: {
+    // Check if that store is fed by fp instructions.
+    Register VReg = cast<GStore>(MI).getValueReg();
+    if (!VReg)
+      break;
+    MachineInstr *DefMI = MRI.getVRegDef(VReg);
+    bool IsFP = onlyDefinesFP(*DefMI, MRI, TRI);
+    getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx);
+    break;
+  }
   default:
-    // Track the bank of each register, use NotFP mapping (all scalars in GPRs)
+    // Track the bank of each register, use NotFP mapping (all scalars in
+    // GPRs)
     getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ false, OpRegBankIdx);
     break;
   }

diff  --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
index 989c5956ad5917..8f38e717e36b0b 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
@@ -62,6 +62,22 @@ class X86RegisterBankInfo final : public X86GenRegisterBankInfo {
                        const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx,
                        SmallVectorImpl<const ValueMapping *> &OpdsMapping);
 
+  // Maximum recursion depth for hasFPConstraints.
+  const unsigned MaxFPRSearchDepth = 2;
+
+  /// \returns true if \p MI only uses and defines FPRs.
+  bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                        const TargetRegisterInfo &TRI,
+                        unsigned Depth = 0) const;
+
+  /// \returns true if \p MI only uses FPRs.
+  bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                  const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
+  /// \returns true if \p MI only defines FPRs.
+  bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
 public:
   X86RegisterBankInfo(const TargetRegisterInfo &TRI);
 

diff  --git a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
index a9b2037e9947a1..8d2ee3c50f215a 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
@@ -10,27 +10,22 @@ define void @test_float(ptr %a , float %b) {
 ; CHECK64_SMALL:       # %bb.0: # %entry
 ; CHECK64_SMALL-NEXT:    movss {{.*#+}} xmm1 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK64_SMALL-NEXT:    addss %xmm0, %xmm1
-; CHECK64_SMALL-NEXT:    movd %xmm1, %eax
-; CHECK64_SMALL-NEXT:    movl %eax, (%rdi)
+; CHECK64_SMALL-NEXT:    movss %xmm1, (%rdi)
 ; CHECK64_SMALL-NEXT:    retq
 ;
 ; CHECK64_LARGE-LABEL: test_float:
 ; CHECK64_LARGE:       # %bb.0: # %entry
 ; CHECK64_LARGE-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
 ; CHECK64_LARGE-NEXT:    addss (%rax), %xmm0
-; CHECK64_LARGE-NEXT:    movd %xmm0, %eax
-; CHECK64_LARGE-NEXT:    movl %eax, (%rdi)
+; CHECK64_LARGE-NEXT:    movss %xmm0, (%rdi)
 ; CHECK64_LARGE-NEXT:    retq
 ;
 ; CHECK32-LABEL: test_float:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK32-NEXT:    movss {{.*#+}} xmm0 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0]
-; CHECK32-NEXT:    movd %ecx, %xmm1
-; CHECK32-NEXT:    addss %xmm0, %xmm1
-; CHECK32-NEXT:    movd %xmm1, %ecx
-; CHECK32-NEXT:    movl %ecx, (%eax)
+; CHECK32-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT:    movss %xmm0, (%eax)
 ; CHECK32-NEXT:    retl
 entry:
   %aa = fadd float 5.500000e+00, %b

diff  --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll
new file mode 100644
index 00000000000000..3388af605d9691
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse -global-isel -stop-after=regbankselect | FileCheck %s
+
+define void @test_x86_sse_max_ps(ptr %p1, ptr %p2) {
+  ; CHECK-LABEL: name: test_x86_sse_max_ps
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.max.ps), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+  ; CHECK-NEXT:   G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+  ; CHECK-NEXT:   RET 0
+  %a0 = load <4 x float>, ptr %p1, align 16
+  %a1 = load <4 x float>, ptr %p2, align 16
+  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  store <4 x float> %res, ptr %p1
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_max_ss(ptr %p1, ptr %p2) {
+  ; CHECK-LABEL: name: test_x86_sse_max_ss
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.max.ss), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+  ; CHECK-NEXT:   G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+  ; CHECK-NEXT:   RET 0
+  %a0 = load <4 x float>, ptr %p1, align 16
+  %a1 = load <4 x float>, ptr %p2, align 16
+  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  store <4 x float> %res, ptr %p1
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_min_ps(ptr %p1, ptr %p2) {
+  ; CHECK-LABEL: name: test_x86_sse_min_ps
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.min.ps), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+  ; CHECK-NEXT:   G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+  ; CHECK-NEXT:   RET 0
+  %a0 = load <4 x float>, ptr %p1, align 16
+  %a1 = load <4 x float>, ptr %p2, align 16
+  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  store <4 x float> %res, ptr %p1
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_min_ss(ptr %p1, ptr %p2) {
+  ; CHECK-LABEL: name: test_x86_sse_min_ss
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.min.ss), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+  ; CHECK-NEXT:   G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+  ; CHECK-NEXT:   RET 0
+  %a0 = load <4 x float>, ptr %p1, align 16
+  %a1 = load <4 x float>, ptr %p2, align 16
+  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  store <4 x float> %res, ptr %p1
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_rcp_ps(ptr %p1, ptr %p2) {
+  ; CHECK-LABEL: name: test_x86_sse_rcp_ps
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rcp.ps), [[LOAD1]](<4 x s32>)
+  ; CHECK-NEXT:   G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+  ; CHECK-NEXT:   RET 0
+  %a0 = load <4 x float>, ptr %p1, align 16
+  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  store <4 x float> %res, ptr %p1
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_rcp_ss(ptr %p1, ptr %p2) {
+  ; CHECK-LABEL: name: test_x86_sse_rcp_ss
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rcp.ss), [[LOAD1]](<4 x s32>)
+  ; CHECK-NEXT:   G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+  ; CHECK-NEXT:   RET 0
+  %a0 = load <4 x float>, ptr %p1, align 16
+  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  store <4 x float> %res, ptr %p1
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_rsqrt_ps(ptr %p1, ptr %p2) {
+  ; CHECK-LABEL: name: test_x86_sse_rsqrt_ps
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rsqrt.ps), [[LOAD1]](<4 x s32>)
+  ; CHECK-NEXT:   G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+  ; CHECK-NEXT:   RET 0
+  %a0 = load <4 x float>, ptr %p1, align 16
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  store <4 x float> %res, ptr %p1
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_rsqrt_ss(ptr %p1, ptr %p2) {
+  ; CHECK-LABEL: name: test_x86_sse_rsqrt_ss
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rsqrt.ss), [[LOAD1]](<4 x s32>)
+  ; CHECK-NEXT:   G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1)
+  ; CHECK-NEXT:   RET 0
+  %a0 = load <4 x float>, ptr %p1, align 16
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  store <4 x float> %res, ptr %p1
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone

diff  --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
index d09db0f2474c96..99d458a183a9bd 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
@@ -142,7 +142,7 @@ define float @f4(float %val) {
   ; X86-LABEL: name: f4
   ; X86: bb.1 (%ir-block.0):
   ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86-NEXT:   [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0)
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:psr(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0)
   ; X86-NEXT:   $fp0 = COPY [[LOAD]](s32)
   ; X86-NEXT:   RET 0, implicit $fp0
   ;
@@ -187,13 +187,10 @@ define void @f5(ptr %a, ptr %b) {
   ; X64-NEXT: {{  $}}
   ; X64-NEXT:   [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi
   ; X64-NEXT:   [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi
-  ; X64-NEXT:   [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.a)
-  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.b)
-  ; X64-NEXT:   [[COPY2:%[0-9]+]]:psr(s64) = COPY [[LOAD]](s64)
-  ; X64-NEXT:   [[COPY3:%[0-9]+]]:psr(s64) = COPY [[LOAD1]](s64)
-  ; X64-NEXT:   [[FADD:%[0-9]+]]:psr(s64) = G_FADD [[COPY2]], [[COPY3]]
-  ; X64-NEXT:   [[COPY4:%[0-9]+]]:gpr(s64) = COPY [[FADD]](s64)
-  ; X64-NEXT:   G_STORE [[COPY4]](s64), [[COPY]](p0) :: (store (s64) into %ir.a)
+  ; X64-NEXT:   [[LOAD:%[0-9]+]]:psr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.a)
+  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:psr(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.b)
+  ; X64-NEXT:   [[FADD:%[0-9]+]]:psr(s64) = G_FADD [[LOAD]], [[LOAD1]]
+  ; X64-NEXT:   G_STORE [[FADD]](s64), [[COPY]](p0) :: (store (s64) into %ir.a)
   ; X64-NEXT:   RET 0
   %load1 = load double, ptr %a, align 8
   %load2 = load double, ptr %b, align 8
@@ -210,11 +207,9 @@ define void @f6(ptr %0, ptr %1) {
   ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0
   ; X86-NEXT:   [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
   ; X86-NEXT:   [[C:%[0-9]+]]:psr(s32) = G_FCONSTANT float 2.000000e+01
-  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.0)
-  ; X86-NEXT:   [[COPY:%[0-9]+]]:psr(s32) = COPY [[LOAD2]](s32)
-  ; X86-NEXT:   [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[COPY]], [[C]]
-  ; X86-NEXT:   [[COPY1:%[0-9]+]]:gpr(s32) = COPY [[FADD]](s32)
-  ; X86-NEXT:   G_STORE [[COPY1]](s32), [[LOAD1]](p0) :: (store (s32) into %ir.1)
+  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:psr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.0)
+  ; X86-NEXT:   [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[LOAD2]], [[C]]
+  ; X86-NEXT:   G_STORE [[FADD]](s32), [[LOAD1]](p0) :: (store (s32) into %ir.1)
   ; X86-NEXT:   RET 0
   ;
   ; X64-LABEL: name: f6
@@ -224,11 +219,9 @@ define void @f6(ptr %0, ptr %1) {
   ; X64-NEXT:   [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi
   ; X64-NEXT:   [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi
   ; X64-NEXT:   [[C:%[0-9]+]]:psr(s32) = G_FCONSTANT float 2.000000e+01
-  ; X64-NEXT:   [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.0)
-  ; X64-NEXT:   [[COPY2:%[0-9]+]]:psr(s32) = COPY [[LOAD]](s32)
-  ; X64-NEXT:   [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[COPY2]], [[C]]
-  ; X64-NEXT:   [[COPY3:%[0-9]+]]:gpr(s32) = COPY [[FADD]](s32)
-  ; X64-NEXT:   G_STORE [[COPY3]](s32), [[COPY1]](p0) :: (store (s32) into %ir.1)
+  ; X64-NEXT:   [[LOAD:%[0-9]+]]:psr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.0)
+  ; X64-NEXT:   [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[LOAD]], [[C]]
+  ; X64-NEXT:   G_STORE [[FADD]](s32), [[COPY1]](p0) :: (store (s32) into %ir.1)
   ; X64-NEXT:   RET 0
   %load1 = load float, ptr %0
   %add = fadd float %load1, 20.0