[llvm] 9630b32 - [MIPS][float] Fixed SingleFloat codegen on N32/N64 targets (#140575)

Sat Sep 27 19:49:44 PDT 2025

Author: Davide Mor
Date: 2025-09-28T10:49:40+08:00
New Revision: 9630b321a50a3712ca092a53a4a4c7bea94b3af2

URL: https://github.com/llvm/llvm-project/commit/9630b321a50a3712ca092a53a4a4c7bea94b3af2
DIFF: https://github.com/llvm/llvm-project/commit/9630b321a50a3712ca092a53a4a4c7bea94b3af2.diff

LOG: [MIPS][float] Fixed SingleFloat codegen on N32/N64 targets (#140575)

This patch aims at making the combination of single-float and N32/N64
ABI properly work.

Right now when both options are enabled the compiler chooses an
incorrect ABI and in some cases even generates wrong instructions.

The floating point behavior on MIPS is controlled through 3 flags:
soft-float, single-float, fp64. This makes things complicated because
fp64 indicates the presence of 64bit floating point registers, but
cannot be easily disabled (the mips3 feature require it, but mips3 CPUs
with only 32bit floating point exist). Also if fp64 is missing it
doesn't actually disable 64bit floating point operations, because
certain MIPS1/2 CPUs support 64bit floating point with 32bit registers,
hence the single-float option.

I'm guessing that originally single-float was only intended for the
latter case, and that's the reason why it doesn't properly work on 64bit
targets.

So this patch does the following:
- Make single-float a "master disable", even if fp64 is enabled this
should completely disable generation of 64bit floating point operations,
making it available on targets which hard require fp64.
- Add proper calling conventions for N32/N64 single-float combinations.
- Fixup codegen to not generate certain 64bit floating point operations,
apparently not assigning a register class to f64 values is not enough to
prevent them from showing up.
- Add tests for the new calling conventions and codegen.

Added: 
    llvm/test/CodeGen/Mips/cconv/arguments-hard-single-float-varargs.ll
    llvm/test/CodeGen/Mips/cconv/arguments-hard-single-float.ll
    llvm/test/CodeGen/Mips/cconv/arguments-hard-single-fp128.ll
    llvm/test/CodeGen/Mips/cconv/callee-saved-singlefloat.ll
    llvm/test/CodeGen/Mips/cconv/return-hard-single-float.ll
    llvm/test/CodeGen/Mips/cconv/return-hard-single-fp128.ll
    llvm/test/CodeGen/Mips/inlineasm-constraints-singlefloat.ll

Modified: 
    llvm/lib/Target/Mips/MipsCallingConv.td
    llvm/lib/Target/Mips/MipsISelLowering.cpp
    llvm/lib/Target/Mips/MipsRegisterInfo.cpp
    llvm/lib/Target/Mips/MipsSEISelLowering.cpp
    llvm/test/CodeGen/Mips/int-to-float-conversion.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Mips/MipsCallingConv.td b/llvm/lib/Target/Mips/MipsCallingConv.td
index 3501f9fbfd2e7..748162525b091 100644

--- a/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -186,7 +186,8 @@ def RetCC_MipsN : CallingConv<[
   //
   // f128 should only occur for the N64 ABI where long double is 128-bit. On
   // N32, long double is equivalent to double.
-  CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<RetCC_F128>>>,
+  CCIfSubtargetNot<"isSingleFloat()",
+      CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<RetCC_F128>>>>,
 
   // Aggregate returns are positioned at the lowest address in the slot for
   // both little and big-endian targets. When passing in registers, this
@@ -316,9 +317,10 @@ def CC_Mips_FixedArg : CallingConv<[
   //
   // f128 should only occur for the N64 ABI where long double is 128-bit. On
   // N32, long double is equivalent to double.
-  CCIfType<[i64],
-      CCIfSubtargetNot<"useSoftFloat()",
-          CCIfOrigArgWasF128<CCBitConvertToType<f64>>>>,
+  CCIfType<[i64], 
+      CCIfSubtargetNot<"isSingleFloat()",
+          CCIfSubtargetNot<"useSoftFloat()", 
+              CCIfOrigArgWasF128<CCBitConvertToType<f64>>>>>,
 
   CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_Mips_FastCC>>,
 
@@ -342,8 +344,8 @@ def CC_Mips : CallingConv<[
 // Callee-saved register lists.
 //===----------------------------------------------------------------------===//
 
-def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
-                                               (sequence "S%u", 7, 0))>;
+def CSR_O32_SingleFloat : CalleeSavedRegs<(add(sequence "F%u", 31, 20), RA, FP,
+                              (sequence "S%u", 7, 0))>;
 
 def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
                                         (sequence "S%u", 7, 0))> {
@@ -357,13 +359,19 @@ def CSR_O32_FP64 :
   CalleeSavedRegs<(add (decimate (sequence "D%u_64", 30, 20), 2), RA, FP,
                        (sequence "S%u", 7, 0))>;
 
-def CSR_N32 : CalleeSavedRegs<(add D20_64, D22_64, D24_64, D26_64, D28_64,
-                                   D30_64, RA_64, FP_64, GP_64,
-                                   (sequence "S%u_64", 7, 0))>;
+def CSR_N32 : CalleeSavedRegs<(add(decimate(sequence "D%u_64", 30, 20), 2),
+                  RA_64, FP_64, GP_64, (sequence "S%u_64", 7, 0))>;
+
+def CSR_N32_SingleFloat
+    : CalleeSavedRegs<(add(decimate(sequence "F%u", 30, 20), 2), RA_64, FP_64,
+          GP_64, (sequence "S%u_64", 7, 0))>;
 
 def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64,
                                    GP_64, (sequence "S%u_64", 7, 0))>;
 
+def CSR_N64_SingleFloat : CalleeSavedRegs<(add(sequence "F%u", 31, 24), RA_64,
+                              FP_64, GP_64, (sequence "S%u_64", 7, 0))>;
+
 def CSR_Mips16RetHelper :
   CalleeSavedRegs<(add V0, V1, FP,
                    (sequence "A%u", 3, 0), (sequence "S%u", 7, 0),

diff  --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 1491300e37d3e..b05de49d8332a 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -4265,10 +4265,16 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
     return std::make_pair(0U, nullptr);
 
   if (Prefix == "$f") { // Parse $f0-$f31.
-    // If the size of FP registers is 64-bit or Reg is an even number, select
-    // the 64-bit register class. Otherwise, select the 32-bit register class.
-    if (VT == MVT::Other)
-      VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
+    // If the targets is single float only, always select 32-bit registers,
+    // otherwise if the size of FP registers is 64-bit or Reg is an even number,
+    // select the 64-bit register class. Otherwise, select the 32-bit register
+    // class.
+    if (VT == MVT::Other) {
+      if (Subtarget.isSingleFloat())
+        VT = MVT::f32;
+      else
+        VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
+    }
 
     RC = getRegClassFor(VT);
 
@@ -4308,10 +4314,12 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
           return std::make_pair(0U, &Mips::CPU16RegsRegClass);
         return std::make_pair(0U, &Mips::GPR32RegClass);
       }
-      if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat())) &&
+      if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat()) ||
+           (VT == MVT::f64 && Subtarget.isSingleFloat())) &&
           !Subtarget.isGP64bit())
         return std::make_pair(0U, &Mips::GPR32RegClass);
-      if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat())) &&
+      if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat()) ||
+           (VT == MVT::f64 && Subtarget.isSingleFloat())) &&
           Subtarget.isGP64bit())
         return std::make_pair(0U, &Mips::GPR64RegClass);
       // This will generate an error message

diff  --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 6f8d6764e77b8..6ca587b1ba4d5 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -89,14 +89,25 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
                                      : CSR_Interrupt_32_SaveList;
   }
 
-  if (Subtarget.isSingleFloat())
-    return CSR_SingleFloatOnly_SaveList;
+  // N64 ABI
+  if (Subtarget.isABI_N64()) {
+    if (Subtarget.isSingleFloat())
+      return CSR_N64_SingleFloat_SaveList;
 
-  if (Subtarget.isABI_N64())
     return CSR_N64_SaveList;
+  }
+
+  // N32 ABI
+  if (Subtarget.isABI_N32()) {
+    if (Subtarget.isSingleFloat())
+      return CSR_N32_SingleFloat_SaveList;
 
-  if (Subtarget.isABI_N32())
     return CSR_N32_SaveList;
+  }
+
+  // O32 ABI
+  if (Subtarget.isSingleFloat())
+    return CSR_O32_SingleFloat_SaveList;
 
   if (Subtarget.isFP64bit())
     return CSR_O32_FP64_SaveList;
@@ -111,14 +122,25 @@ const uint32_t *
 MipsRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const {
   const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
-  if (Subtarget.isSingleFloat())
-    return CSR_SingleFloatOnly_RegMask;
+  // N64 ABI
+  if (Subtarget.isABI_N64()) {
+    if (Subtarget.isSingleFloat())
+      return CSR_N64_SingleFloat_RegMask;
 
-  if (Subtarget.isABI_N64())
     return CSR_N64_RegMask;
+  }
+
+  // N32 ABI
+  if (Subtarget.isABI_N32()) {
+    if (Subtarget.isSingleFloat())
+      return CSR_N32_SingleFloat_RegMask;
 
-  if (Subtarget.isABI_N32())
     return CSR_N32_RegMask;
+  }
+
+  // O32 ABI
+  if (Subtarget.isSingleFloat())
+    return CSR_O32_SingleFloat_RegMask;
 
   if (Subtarget.isFP64bit())
     return CSR_O32_FP64_RegMask;

diff  --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 71a70d9c2dd46..19917f3650bb5 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGenTypes/MachineValueType.h"
@@ -211,6 +212,16 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
     }
   }
 
+  // Targets with 64bits integer registers, but no 64bit floating point register
+  // do not support conversion between them
+  if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
+      !Subtarget.useSoftFloat()) {
+    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+  }
+
   setOperationAction(ISD::SMUL_LOHI,          MVT::i32, Custom);
   setOperationAction(ISD::UMUL_LOHI,          MVT::i32, Custom);
   setOperationAction(ISD::MULHS,              MVT::i32, Custom);

diff  --git a/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-float-varargs.ll b/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-float-varargs.ll
new file mode 100644
index 0000000000000..8cbc879310f61
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-float-varargs.ll
@@ -0,0 +1,148 @@
+; RUN: llc -mtriple=mips -relocation-model=static -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32,O32 %s
+; RUN: llc -mtriple=mipsel -relocation-model=static -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32,O32 %s
+
+; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n32 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32,N32,NEW,NEWBE %s
+; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n32 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32,N32,NEW,NEWLE %s
+
+; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n64 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM64,N64,NEW,NEWBE %s
+; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n64 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM64,N64,NEW,NEWLE %s
+
+ at floats = global [11 x float] zeroinitializer
+ at doubles = global [11 x double] zeroinitializer
+
+define void @double_args(double %a, ...)
+                         nounwind {
+entry:
+        %0 = getelementptr [11 x double], ptr @doubles, i32 0, i32 1
+        store volatile double %a, ptr %0
+
+        %ap = alloca ptr
+        call void @llvm.va_start(ptr %ap)
+        %b = va_arg ptr %ap, double
+        %1 = getelementptr [11 x double], ptr @doubles, i32 0, i32 2
+        store volatile double %b, ptr %1
+        call void @llvm.va_end(ptr %ap)
+        ret void
+}
+
+; ALL-LABEL: double_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:         daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+
+; O32 forbids using floating point registers for the non-variable portion.
+; N32/N64 allow it.
+; O32-DAG:           sw $4, 8([[R2]])
+; O32-DAG:           sw $5, 12([[R2]])
+; NEW-DAG:           sd $4, 8([[R2]])
+
+; The varargs portion is dumped to stack
+; O32-DAG:           sw $6, 16($sp)
+; O32-DAG:           sw $7, 20($sp)
+; NEW-DAG:           sd $5, 8($sp)
+; NEW-DAG:           sd $6, 16($sp)
+; NEW-DAG:           sd $7, 24($sp)
+; NEW-DAG:           sd $8, 32($sp)
+; NEW-DAG:           sd $9, 40($sp)
+; NEW-DAG:           sd $10, 48($sp)
+; NEW-DAG:           sd $11, 56($sp)
+
+; Get the varargs pointer
+; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and 8 bytes reserved
+; for arguments 1 and 2.
+; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
+; O32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 16
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           daddiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+
+; Increment the pointer then get the varargs arg
+; LLVM will rebind the load to the stack pointer instead of the varargs pointer
+; during lowering. This is fine and doesn't change the behaviour.
+; O32-DAG:           addiu [[VAPTR]], [[VAPTR]], 8
+; N32-DAG:           addiu [[VAPTR]], [[VAPTR]], 8
+; N64-DAG:           daddiu [[VAPTR]], [[VAPTR]], 8
+; O32-DAG:           lw	[[R3:\$[0-9]+]], 16($sp)
+; O32-DAG:           lw	[[R4:\$[0-9]+]], 20($sp)
+; O32-DAG:           sw [[R3]], 16([[R2]])
+; O32-DAG:           sw [[R4]], 20([[R2]])
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 8($sp)
+; NEW-DAG:           sd [[R3]], 16([[R2]])
+
+define void @float_args(float %a, ...) nounwind {
+entry:
+        %0 = getelementptr [11 x float], ptr @floats, i32 0, i32 1
+        store volatile float %a, ptr %0
+
+        %ap = alloca ptr
+        call void @llvm.va_start(ptr %ap)
+        %b = va_arg ptr %ap, float
+        %1 = getelementptr [11 x float], ptr @floats, i32 0, i32 2
+        store volatile float %b, ptr %1
+        call void @llvm.va_end(ptr %ap)
+        ret void
+}
+
+; ALL-LABEL: float_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:         daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+
+; The first four arguments are the same in O32/N32/N64.
+; The non-variable portion should be unaffected.
+; O32-DAG:           mtc1 $4, $f0
+; O32-DAG:           swc1 $f0, 4([[R2]])
+; NEW-DAG:           swc1 $f12, 4([[R2]])
+
+; The varargs portion is dumped to stack
+; O32-DAG:           sw $5, 12($sp)
+; O32-DAG:           sw $6, 16($sp)
+; O32-DAG:           sw $7, 20($sp)
+; NEW-DAG:           sd $5, 8($sp)
+; NEW-DAG:           sd $6, 16($sp)
+; NEW-DAG:           sd $7, 24($sp)
+; NEW-DAG:           sd $8, 32($sp)
+; NEW-DAG:           sd $9, 40($sp)
+; NEW-DAG:           sd $10, 48($sp)
+; NEW-DAG:           sd $11, 56($sp)
+
+; Get the varargs pointer
+; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and should have 8
+; bytes reserved for arguments 1 and 2 (the first float arg) but as discussed in
+; arguments-float.ll, GCC doesn't agree with MD00305 and treats floats as 4
+; bytes so we only have 12 bytes total.
+; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
+; O32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 12
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           daddiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+
+; Increment the pointer then get the varargs arg
+; LLVM will rebind the load to the stack pointer instead of the varargs pointer
+; during lowering. This is fine and doesn't change the behaviour.
+; Also, in big-endian mode the offset must be increased by 4 to retrieve the
+; correct half of the argument slot.
+;
+; O32-DAG:           addiu [[VAPTR]], [[VAPTR]], 4
+; N32-DAG:           addiu [[VAPTR]], [[VAPTR]], 8
+; N64-DAG:           daddiu [[VAPTR]], [[VAPTR]], 8
+; O32-DAG:           lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
+; NEWLE-DAG:         lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; NEWBE-DAG:         lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
+; ALL-DAG:           swc1 [[FTMP1]], 8([[R2]])
+
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_copy(ptr, ptr)
+declare void @llvm.va_end(ptr)

diff  --git a/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-float.ll b/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-float.ll
new file mode 100644
index 0000000000000..6b7ad03c8e1c2
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-float.ll
@@ -0,0 +1,224 @@
+; RUN: llc -mtriple=mips -relocation-model=static -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32,O32 %s
+; RUN: llc -mtriple=mipsel -relocation-model=static -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32,O32 %s
+
+; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n32 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32,NEW %s
+; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n32 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32,NEW %s
+
+; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n64 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM64,NEW %s
+; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n64 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM64,NEW %s
+
+ at bytes = global [11 x i8] zeroinitializer
+ at dwords = global [11 x i64] zeroinitializer
+ at floats = global [11 x float] zeroinitializer
+ at doubles = global [11 x double] zeroinitializer
+
+define void @double_args(double %a, double %b, double %c, double %d, double %e,
+                         double %f, double %g, double %h, double %i) nounwind {
+entry:
+        %0 = getelementptr [11 x double], ptr @doubles, i32 0, i32 1
+        store volatile double %a, ptr %0
+        %1 = getelementptr [11 x double], ptr @doubles, i32 0, i32 2
+        store volatile double %b, ptr %1
+        %2 = getelementptr [11 x double], ptr @doubles, i32 0, i32 3
+        store volatile double %c, ptr %2
+        %3 = getelementptr [11 x double], ptr @doubles, i32 0, i32 4
+        store volatile double %d, ptr %3
+        %4 = getelementptr [11 x double], ptr @doubles, i32 0, i32 5
+        store volatile double %e, ptr %4
+        %5 = getelementptr [11 x double], ptr @doubles, i32 0, i32 6
+        store volatile double %f, ptr %5
+        %6 = getelementptr [11 x double], ptr @doubles, i32 0, i32 7
+        store volatile double %g, ptr %6
+        %7 = getelementptr [11 x double], ptr @doubles, i32 0, i32 8
+        store volatile double %h, ptr %7
+        %8 = getelementptr [11 x double], ptr @doubles, i32 0, i32 9
+        store volatile double %i, ptr %8
+        ret void
+}
+
+; ALL-LABEL: double_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:         daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument is floating point but single-float is enabled so floating
+; point registers are not used.
+; O32-DAG:           sw $4, 8([[R2]])
+; O32-DAG:           sw $5, 12([[R2]])
+; NEW-DAG:           sd $4, 8([[R2]])
+
+; O32-DAG:           sw $6, 16([[R2]])
+; O32-DAG:           sw $7, 20([[R2]])
+; NEW-DAG:           sd $5, 16([[R2]])
+
+; O32 has run out of argument registers and starts using the stack
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 16($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 20($sp)
+; O32-DAG:           sw [[R3]], 24([[R2]])
+; O32-DAG:           sw [[R4]], 28([[R2]])
+; NEW-DAG:           sd $6, 24([[R2]])
+
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 24($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 28($sp)
+; O32-DAG:           sw [[R3]], 32([[R2]])
+; O32-DAG:           sw [[R4]], 36([[R2]])
+; NEW-DAG:           sd $7, 32([[R2]])
+
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 32($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 36($sp)
+; O32-DAG:           sw [[R3]], 40([[R2]])
+; O32-DAG:           sw [[R4]], 44([[R2]])
+; NEW-DAG:           sd $8, 40([[R2]])
+
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 40($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 44($sp)
+; O32-DAG:           sw [[R3]], 48([[R2]])
+; O32-DAG:           sw [[R4]], 52([[R2]])
+; NEW-DAG:           sd $9, 48([[R2]])
+
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 48($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 52($sp)
+; O32-DAG:           sw [[R3]], 56([[R2]])
+; O32-DAG:           sw [[R4]], 60([[R2]])
+; NEW-DAG:           sd $10, 56([[R2]])
+
+; N32/N64 have run out of registers and starts using the stack too
+; O32-DAG:           lw [[R3:\$[0-9]+]], 56($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 60($sp)
+; O32-DAG:           sw [[R3]], 64([[R2]])
+; O32-DAG:           sw [[R4]], 68([[R2]])
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sd $11, 64([[R2]])
+
+define void @float_args(float %a, float %b, float %c, float %d, float %e,
+                        float %f, float %g, float %h, float %i) nounwind {
+entry:
+        %0 = getelementptr [11 x float], ptr @floats, i32 0, i32 1
+        store volatile float %a, ptr %0
+        %1 = getelementptr [11 x float], ptr @floats, i32 0, i32 2
+        store volatile float %b, ptr %1
+        %2 = getelementptr [11 x float], ptr @floats, i32 0, i32 3
+        store volatile float %c, ptr %2
+        %3 = getelementptr [11 x float], ptr @floats, i32 0, i32 4
+        store volatile float %d, ptr %3
+        %4 = getelementptr [11 x float], ptr @floats, i32 0, i32 5
+        store volatile float %e, ptr %4
+        %5 = getelementptr [11 x float], ptr @floats, i32 0, i32 6
+        store volatile float %f, ptr %5
+        %6 = getelementptr [11 x float], ptr @floats, i32 0, i32 7
+        store volatile float %g, ptr %6
+        %7 = getelementptr [11 x float], ptr @floats, i32 0, i32 8
+        store volatile float %h, ptr %7
+        %8 = getelementptr [11 x float], ptr @floats, i32 0, i32 9
+        store volatile float %i, ptr %8
+        ret void
+}
+
+; ALL-LABEL: float_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:         daddiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+
+; The first argument is floating point so floating point registers are used.
+; The first argument is the same for O32/N32/N64 but the second argument 
diff ers
+; by register
+; ALL-DAG:           swc1 $f12, 4([[R1]])
+; O32-DAG:           swc1 $f14, 8([[R1]])
+; NEW-DAG:           swc1 $f13, 8([[R1]])
+
+; O32 has run out of argument registers and (in theory) starts using the stack
+; I've yet to find a reference in the documentation about this but GCC uses up
+; the remaining two argument slots in the GPR's first. We'll do the same for
+; compatibility.
+; O32-DAG:           mtc1 $6, $f0
+; O32-DAG:           swc1 $f0, 12([[R1]])
+; NEW-DAG:           swc1 $f14, 12([[R1]])
+; O32-DAG:           mtc1 $7, $f0
+; O32-DAG:           swc1 $f0, 16([[R1]])
+; NEW-DAG:           swc1 $f15, 16([[R1]])
+
+; O32 is definitely out of registers now and switches to the stack.
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 16($sp)
+; O32-DAG:           swc1 [[F1]], 20([[R1]])
+; NEW-DAG:           swc1 $f16, 20([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 20($sp)
+; O32-DAG:           swc1 [[F1]], 24([[R1]])
+; NEW-DAG:           swc1 $f17, 24([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 24($sp)
+; O32-DAG:           swc1 [[F1]], 28([[R1]])
+; NEW-DAG:           swc1 $f18, 28([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 28($sp)
+; O32-DAG:           swc1 [[F1]], 32([[R1]])
+; NEW-DAG:           swc1 $f19, 32([[R1]])
+
+; N32/N64 have run out of registers and start using the stack too
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 32($sp)
+; O32-DAG:           swc1 [[F1]], 36([[R1]])
+; NEW-DAG:           lwc1 [[F1:\$f[0-9]+]], 0($sp)
+; NEW-DAG:           swc1 [[F1]], 36([[R1]])
+
+
+define void @double_arg2(i8 %a, double %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8], ptr @bytes, i32 0, i32 1
+        store volatile i8 %a, ptr %0
+        %1 = getelementptr [11 x double], ptr @doubles, i32 0, i32 1
+        store volatile double %b, ptr %1
+        ret void
+}
+
+; ALL-LABEL: double_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:         daddiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:         daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument isn't floating point so floating point registers are not
+; used.
+; The second slot is insufficiently aligned for double on O32 so it is skipped.
+; Also, double occupies two slots on O32 and only one for N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+; O32-DAG:           sw $6, 8([[R2]])
+; O32-DAG:           sw $7, 12([[R2]])
+; NEW-DAG:           sd $5, 8([[R2]])
+
+define void @float_arg2(i8 %a, float %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8], ptr @bytes, i32 0, i32 1
+        store volatile i8 %a, ptr %0
+        %1 = getelementptr [11 x float], ptr @floats, i32 0, i32 1
+        store volatile float %b, ptr %1
+        ret void
+}
+
+; ALL-LABEL: float_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:         daddiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:         daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+
+; The first argument is the same in O32/N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+
+; The first argument isn't floating point so floating point registers are not
+; used in O32, but N32/N64 will still use them.
+; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
+; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
+; aligned and occupying one slot. We'll use GCC's definition.
+; O32-DAG:           mtc1 $5, $f0
+; O32-DAG:           swc1 $f0, 4([[R2]])
+; NEW-DAG:           swc1 $f13, 4([[R2]])

diff  --git a/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-fp128.ll b/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-fp128.ll
new file mode 100644
index 0000000000000..9268e37b02fb5
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-hard-single-fp128.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n32 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32 %s
+; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n32 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32 %s
+
+; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n64 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM64 %s
+; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n64 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM64 %s
+
+ at ldoubles = global [11 x fp128] zeroinitializer
+
+define void @ldouble_args(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e) nounwind {
+entry:
+        %0 = getelementptr [11 x fp128], ptr @ldoubles, i32 0, i32 1
+        store volatile fp128 %a, ptr %0
+        %1 = getelementptr [11 x fp128], ptr @ldoubles, i32 0, i32 2
+        store volatile fp128 %b, ptr %1
+        %2 = getelementptr [11 x fp128], ptr @ldoubles, i32 0, i32 3
+        store volatile fp128 %c, ptr %2
+        %3 = getelementptr [11 x fp128], ptr @ldoubles, i32 0, i32 4
+        store volatile fp128 %d, ptr %3
+        %4 = getelementptr [11 x fp128], ptr @ldoubles, i32 0, i32 5
+        store volatile fp128 %e, ptr %4
+        ret void
+}
+
+; ALL-LABEL: ldouble_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(ldoubles)
+; SYM64-DAG:         daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(ldoubles)
+
+; The first four arguments are the same in N32/N64.
+; ALL-DAG:           sd	$5, 24([[R2]])
+; ALL-DAG:           sd	$4, 16([[R2]])
+; ALL-DAG:           sd	$7, 40([[R2]])
+; ALL-DAG:           sd	$6, 32([[R2]])
+; ALL-DAG:           sd	$9, 56([[R2]])
+; ALL-DAG:           sd	$8, 48([[R2]])
+; ALL-DAG:           sd	$11, 72([[R2]])
+; ALL-DAG:           sd	$10, 64([[R2]])

diff  --git a/llvm/test/CodeGen/Mips/cconv/callee-saved-singlefloat.ll b/llvm/test/CodeGen/Mips/cconv/callee-saved-singlefloat.ll
new file mode 100644
index 0000000000000..5bf1f2c2d60da
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/callee-saved-singlefloat.ll
@@ -0,0 +1,111 @@
+; RUN: llc -mtriple=mips -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,O32 %s
+; RUN: llc -mtriple=mipsel -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,O32 %s
+
+; RUN: llc -mtriple=mips64 -target-abi n32 -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,N32 %s
+; RUN: llc -mtriple=mips64el -target-abi n32 -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,N32 %s
+; RUN: llc -mtriple=mips64 -target-abi n32 -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,ALL-INV,N32-INV %s
+; RUN: llc -mtriple=mips64el -target-abi n32 -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,ALL-INV,N32-INV %s
+
+; RUN: llc -mtriple=mips64 -target-abi n64 -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,N64 %s
+; RUN: llc -mtriple=mips64el -target-abi n64 -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,N64 %s
+; RUN: llc -mtriple=mips64 -target-abi n64 -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,ALL-INV,N64-INV %s
+; RUN: llc -mtriple=mips64el -target-abi n64 -mattr=+single-float < %s | FileCheck --check-prefixes=ALL,ALL-INV,N64-INV %s
+
+define void @fpu_clobber() nounwind {
+entry:
+        call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f13},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+        ret void
+}
+
+; ALL-LABEL: fpu_clobber:
+; ALL-INV-NOT:   swc1 $f0,
+; ALL-INV-NOT:   swc1 $f1,
+; ALL-INV-NOT:   swc1 $f2,
+; ALL-INV-NOT:   swc1 $f3,
+; ALL-INV-NOT:   swc1 $f4,
+; ALL-INV-NOT:   swc1 $f5,
+; ALL-INV-NOT:   swc1 $f6,
+; ALL-INV-NOT:   swc1 $f7,
+; ALL-INV-NOT:   swc1 $f8,
+; ALL-INV-NOT:   swc1 $f9,
+; ALL-INV-NOT:   swc1 $f10,
+; ALL-INV-NOT:   swc1 $f11,
+; ALL-INV-NOT:   swc1 $f12,
+; ALL-INV-NOT:   swc1 $f13,
+; ALL-INV-NOT:   swc1 $f14,
+; ALL-INV-NOT:   swc1 $f15,
+; ALL-INV-NOT:   swc1 $f16,
+; ALL-INV-NOT:   swc1 $f17,
+; ALL-INV-NOT:   swc1 $f18,
+; ALL-INV-NOT:   swc1 $f19,
+
+; O32:           addiu $sp, $sp, -48
+; O32-DAG:       swc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F21:\$f21]], [[OFF21:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F23:\$f23]], [[OFF23:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F25:\$f25]], [[OFF25:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F27:\$f27]], [[OFF27:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F29:\$f29]], [[OFF29:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; O32-DAG:       swc1 [[F31:\$f31]], [[OFF31:[0-9]+]]($sp)
+; O32-DAG:       lwc1 [[F20]], [[OFF20]]($sp)
+; O32-DAG:       lwc1 [[F21]], [[OFF21]]($sp)
+; O32-DAG:       lwc1 [[F22]], [[OFF22]]($sp)
+; O32-DAG:       lwc1 [[F23]], [[OFF23]]($sp)
+; O32-DAG:       lwc1 [[F24]], [[OFF24]]($sp)
+; O32-DAG:       lwc1 [[F25]], [[OFF25]]($sp)
+; O32-DAG:       lwc1 [[F26]], [[OFF26]]($sp)
+; O32-DAG:       lwc1 [[F27]], [[OFF27]]($sp)
+; O32-DAG:       lwc1 [[F28]], [[OFF28]]($sp)
+; O32-DAG:       lwc1 [[F29]], [[OFF29]]($sp)
+; O32-DAG:       lwc1 [[F30]], [[OFF30]]($sp)
+; O32-DAG:       lwc1 [[F31]], [[OFF31]]($sp)
+; O32:           addiu $sp, $sp, 48
+
+; N32:           addiu $sp, $sp, -32
+; N32-DAG:       swc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; N32-INV-NOT:   swc1 $f21,
+; N32-DAG:       swc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp)
+; N32-INV-NOT:   swc1 $f23,
+; N32-DAG:       swc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; N32-INV-NOT:   swc1 $f25,
+; N32-DAG:       swc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; N32-INV-NOT:   swc1 $f27,
+; N32-DAG:       swc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; N32-INV-NOT:   swc1 $f29,
+; N32-DAG:       swc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; N32-INV-NOT:   swc1 $f31,
+; N32-DAG:       lwc1 [[F20]], [[OFF20]]($sp)
+; N32-DAG:       lwc1 [[F22]], [[OFF22]]($sp)
+; N32-DAG:       lwc1 [[F24]], [[OFF24]]($sp)
+; N32-DAG:       lwc1 [[F26]], [[OFF26]]($sp)
+; N32-DAG:       lwc1 [[F28]], [[OFF28]]($sp)
+; N32-DAG:       lwc1 [[F30]], [[OFF30]]($sp)
+; N32:           addiu $sp, $sp, 32
+
+; N64:           addiu $sp, $sp, -32
+; N64-INV-NOT:   swc1 $f20,
+; N64-INV-NOT:   swc1 $f21,
+; N64-INV-NOT:   swc1 $f22,
+; N64-INV-NOT:   swc1 $f23,
+; N64-DAG:       swc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; N64-DAG:       swc1 [[F25:\$f25]], [[OFF25:[0-9]+]]($sp)
+; N64-DAG:       swc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; N64-DAG:       swc1 [[F27:\$f27]], [[OFF27:[0-9]+]]($sp)
+; N64-DAG:       swc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; N64-DAG:       swc1 [[F29:\$f29]], [[OFF29:[0-9]+]]($sp)
+; N64-DAG:       swc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; N64-DAG:       swc1 [[F31:\$f31]], [[OFF31:[0-9]+]]($sp)
+; N64-DAG:       lwc1 [[F24]], [[OFF24]]($sp)
+; N64-DAG:       lwc1 [[F25]], [[OFF25]]($sp)
+; N64-DAG:       lwc1 [[F26]], [[OFF26]]($sp)
+; N64-DAG:       lwc1 [[F27]], [[OFF27]]($sp)
+; N64-DAG:       lwc1 [[F28]], [[OFF28]]($sp)
+; N64-DAG:       lwc1 [[F29]], [[OFF29]]($sp)
+; N64-DAG:       lwc1 [[F30]], [[OFF30]]($sp)
+; N64-DAG:       lwc1 [[F31]], [[OFF31]]($sp)
+; N64:           addiu $sp, $sp, 32
\ No newline at end of file

diff  --git a/llvm/test/CodeGen/Mips/cconv/return-hard-single-float.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-single-float.ll
new file mode 100644
index 0000000000000..1abf08d8200fb
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-single-float.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mattr=+single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,O32 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static -mattr=+single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n32 -mattr=+single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n32 -mattr=+single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n64 -mattr=+single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n64 -mattr=+single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,N64 %s
+
+ at float = global float zeroinitializer
+ at double = global double zeroinitializer
+
+define float @retfloat() nounwind {
+entry:
+        %0 = load volatile float, ptr @float
+        ret float %0
+}
+
+; ALL-LABEL: retfloat:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; O32-DAG:           lwc1 $f0, %lo(float)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; N32-DAG:           lwc1 $f0, %lo(float)([[R1]])
+; N64-DAG:           lwc1 $f0, %lo(float)([[R1:\$[0-9+]]])
+
+define double @retdouble() nounwind {
+entry:
+        %0 = load volatile double, ptr @double
+        ret double %0
+}
+
+; ALL-LABEL: retdouble:
+; O32-DAG:           lw $2, %lo(double)([[R1:\$[0-9]+]])
+; O32-DAG:           addiu [[R2:\$[0-9]+]], [[R1]], %lo(double)
+; O32-DAG:           lw $3, 4([[R2]])
+; N32-DAG:           ld $2, %lo(double)([[R1:\$[0-9]+]])
+; N64-DAG:           ld $2, %lo(double)([[R1:\$[0-9]+]])

diff  --git a/llvm/test/CodeGen/Mips/cconv/return-hard-single-fp128.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-single-fp128.ll
new file mode 100644
index 0000000000000..e4d04146ecc2f
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-single-fp128.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n32 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32 %s
+; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n32 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM32 %s
+
+; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n64 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM64 %s
+; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n64 -mattr=single-float < %s \
+; RUN:   | FileCheck --check-prefixes=ALL,SYM64 %s
+
+ at fp128 = global fp128 zeroinitializer
+
+define fp128 @retldouble() nounwind {
+entry:
+        %0 = load volatile fp128, ptr @fp128
+        ret fp128 %0
+}
+
+; ALL-LABEL: retldouble:
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(fp128)
+; SYM64-DAG:         daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(fp128)
+
+; ALL-DAG:           ld $2, %lo(fp128)([[R2]])
+; ALL-DAG:           ld $3, 8([[R2]])

diff  --git a/llvm/test/CodeGen/Mips/inlineasm-constraints-singlefloat.ll b/llvm/test/CodeGen/Mips/inlineasm-constraints-singlefloat.ll
new file mode 100644
index 0000000000000..ddebddcdab260
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/inlineasm-constraints-singlefloat.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=mips -mattr=+single-float < %s | FileCheck %s --check-prefix=MIPS32
+; RUN: llc -mtriple=mips64 -mattr=+single-float < %s | FileCheck %s --check-prefix=MIPS64
+
+define void @read_double(ptr %0) {
+; MIPS32-LABEL: read_double:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lw $2, 4($4)
+; MIPS32-NEXT:    lw $3, 0($4)
+; MIPS32-NEXT:    #APP
+; MIPS32-NEXT:    #NO_APP
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: read_double:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    ld $2, 0($4)
+; MIPS64-NEXT:    #APP
+; MIPS64-NEXT:    #NO_APP
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+  %2 = load double, ptr %0, align 8
+  tail call void asm sideeffect "", "r,~{$1}"(double %2)
+  ret void
+}
+
+define void @read_float(ptr %0) {
+; MIPS32-LABEL: read_float:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lwc1 $f0, 0($4)
+; MIPS32-NEXT:    #APP
+; MIPS32-NEXT:    #NO_APP
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS64-LABEL: read_float:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lwc1 $f0, 0($4)
+; MIPS64-NEXT:    #APP
+; MIPS64-NEXT:    #NO_APP
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+  %2 = load float, ptr %0, align 8
+  tail call void asm sideeffect "", "f"(float %2)
+  ret void
+}
+
+; Test that a proper register class is assigned to clobbers in single-float mode
+define float @explicit_float_register_clobber(ptr %0) {
+; MIPS32-LABEL: explicit_float_register_clobber:
+; MIPS32:       # %bb.0:
+; MIPS32-NEXT:    lwc1 $f1, 0($4)
+; MIPS32-NEXT:    #APP
+; MIPS32-NEXT:    #NO_APP
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    mov.s $f0, $f1
+;
+; MIPS64-LABEL: explicit_float_register_clobber:
+; MIPS64:       # %bb.0:
+; MIPS64-NEXT:    lwc1 $f1, 0($4)
+; MIPS64-NEXT:    #APP
+; MIPS64-NEXT:    #NO_APP
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    mov.s $f0, $f1
+  %2 = load float, ptr %0, align 8
+  tail call void asm sideeffect "", "~{$f0}"()
+  ret float %2
+}

diff  --git a/llvm/test/CodeGen/Mips/int-to-float-conversion.ll b/llvm/test/CodeGen/Mips/int-to-float-conversion.ll
index 84bc6a253595a..1c8ad9ad07e15 100644
--- a/llvm/test/CodeGen/Mips/int-to-float-conversion.ll
+++ b/llvm/test/CodeGen/Mips/int-to-float-conversion.ll
@@ -1,13 +1,24 @@
-; RUN: llc -mtriple=mipsel < %s | FileCheck %s -check-prefix=32
-; RUN: llc -mtriple=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=64
-; RUN: llc -mtriple=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
+; RUN: llc -mtriple=mipsel < %s | FileCheck %s -check-prefixes=ALL,32,32DF
+; RUN: llc -mtriple=mipsel -mattr=+single-float < %s | FileCheck %s -check-prefixes=ALL,32,32SF
+
+; RUN: llc -mtriple=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefixes=ALL,64,64DF
+; RUN: llc -mtriple=mips64el -mcpu=mips4 -mattr=+single-float < %s \
+; RUN:   | FileCheck %s -check-prefixes=ALL,64,64SF
+
+; RUN: llc -mtriple=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefixes=ALL,64,64DF
+; RUN: llc -mtriple=mips64el -mcpu=mips64 -mattr=+single-float < %s \
+; RUN:   | FileCheck %s -check-prefixes=ALL,64,64SF
+
+; Test various combinations of 32/64bit GP registers and single/double floating point support.
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global ptr null, align 4
 
-; 32-LABEL: test_float_int_:
-; 32: mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
-; 32: cvt.s.w $f{{[0-9]+}}, $f[[R0]]
+; ALL-LABEL: test_float_int_:
+; 32:   mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 32:   cvt.s.w $f{{[0-9]+}}, $f[[R0]]
+; 64:   mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 64:   cvt.s.w $f{{[0-9]+}}, $f[[R0]]
 
 define float @test_float_int_(i32 %a) {
 entry:
@@ -15,12 +26,13 @@ entry:
   ret float %conv
 }
 
-; 32-LABEL: test_double_int_:
-; 32: mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
-; 32: cvt.d.w $f{{[0-9]+}}, $f[[R0]]
-; 64-LABEL: test_double_int_:
-; 64: mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
-; 64: cvt.d.w $f{{[0-9]+}}, $f[[R0]]
+; ALL-LABEL: test_double_int_:
+; 32DF: mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 32DF: cvt.d.w $f{{[0-9]+}}, $f[[R0]]
+; 32SF: jal	__floatsidf
+; 64DF: mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 64DF: cvt.d.w $f{{[0-9]+}}, $f[[R0]]
+; 64SF: jal	__floatsidf
 
 define double @test_double_int_(i32 %a) {
 entry:
@@ -28,9 +40,11 @@ entry:
   ret double %conv
 }
 
-; 64-LABEL: test_float_LL_:
-; 64: dmtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
-; 64: cvt.s.l $f{{[0-9]+}}, $f[[R0]]
+; ALL-LABEL: test_float_LL_:
+; 32:   jal __floatdisf
+; 64DF: dmtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 64DF: cvt.s.l $f{{[0-9]+}}, $f[[R0]]
+; 64SF: jal __floatdisf
 
 define float @test_float_LL_(i64 %a) {
 entry:
@@ -38,9 +52,11 @@ entry:
   ret float %conv
 }
 
-; 64-LABEL: test_double_LL_:
-; 64: dmtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
-; 64: cvt.d.l $f{{[0-9]+}}, $f[[R0]]
+; ALL-LABEL: test_double_LL_:
+; 32:   jal __floatdidf
+; 64DF: dmtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 64DF: cvt.d.l $f{{[0-9]+}}, $f[[R0]]
+; 64SF: jal __floatdidf
 
 define double @test_double_LL_(i64 %a) {
 entry: