This patch defines two pseudo instructions that are expanded to a pair of mtc1 or mtf1 instructions after register allocation.<br><br>Previously,
f64 arguments passed in integer registers were read and written via
load and store instructions (2 i32 stores and 1 f64 load were needed
when reading a double value and 1 f64 store and 2 i32 loads were needed
when writing one). After applying this patch, mtc1 or mfc1 instructions
are generated instead which are register transfer instructions between
integer and floating point register files.<br>
<br>Index: test/CodeGen/Mips/buildpairextractelementf64.ll<br>===================================================================<br>--- test/CodeGen/Mips/buildpairextractelementf64.ll (revision 0)<br>+++ test/CodeGen/Mips/buildpairextractelementf64.ll (revision 0)<br>
@@ -0,0 +1,27 @@<br>+; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL<br>+; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB<br>+@a = external global i32<br>+<br>+define double @f(i32 %a1, double %d) nounwind {<br>
+entry:<br>+; CHECK-EL: mtc1 $6, $f12<br>+; CHECK-EL: mtc1 $7, $f13<br>+; CHECK-EB: mtc1 $7, $f12<br>+; CHECK-EB: mtc1 $6, $f13<br>+ store i32 %a1, i32* @a, align 4<br>+ %add = fadd double %d, 2.000000e+00<br>+ ret double %add<br>
+}<br>+<br>+define void @f3(double %d, i32 %a1) nounwind {<br>+entry:<br>+; CHECK-EL: mfc1 ${{[0-9]+}}, $f12<br>+; CHECK-EL: mfc1 $7, $f13<br>+; CHECK-EB: mfc1 ${{[0-9]+}}, $f13<br>+; CHECK-EB: mfc1 $7, $f12<br>+ tail call void @f2(i32 %a1, double %d) nounwind<br>
+ ret void<br>+}<br>+<br>+declare void @f2(i32, double)<br>+<br>Index: lib/Target/Mips/MipsTargetMachine.cpp<br>===================================================================<br>--- lib/Target/Mips/MipsTargetMachine.cpp (revision 129116)<br>
+++ lib/Target/Mips/MipsTargetMachine.cpp (working copy)<br>@@ -75,3 +75,9 @@<br> PM.add(createMipsDelaySlotFillerPass(*this));<br> return true;<br> }<br>+<br>+bool MipsTargetMachine::<br>+addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {<br>
+ PM.add(createMipsExpandPseudoInstrsPass(*this));<br>+ return true;<br>+}<br>Index: lib/Target/Mips/Mips.h<br>===================================================================<br>--- lib/Target/Mips/Mips.h (revision 129116)<br>
+++ lib/Target/Mips/Mips.h (working copy)<br>@@ -25,7 +25,7 @@<br> <br> FunctionPass *createMipsISelDag(MipsTargetMachine &TM);<br> FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);<br>-<br>
+ FunctionPass *createMipsExpandPseudoInstrsPass(MipsTargetMachine &TM);<br> extern Target TheMipsTarget;<br> extern Target TheMipselTarget;<br> <br>Index: lib/Target/Mips/MipsISelLowering.h<br>===================================================================<br>
--- lib/Target/Mips/MipsISelLowering.h (revision 129116)<br>+++ lib/Target/Mips/MipsISelLowering.h (working copy)<br>@@ -64,7 +64,10 @@<br> <br> // DivRem(u)<br> DivRem,<br>- DivRemU<br>+ DivRemU,<br>
+<br>+ BuildPairF64,<br>+ ExtractElementF64<br> };<br> }<br> <br>Index: lib/Target/Mips/MipsTargetMachine.h<br>===================================================================<br>--- lib/Target/Mips/MipsTargetMachine.h (revision 129116)<br>
+++ lib/Target/Mips/MipsTargetMachine.h (working copy)<br>@@ -63,6 +63,7 @@<br> CodeGenOpt::Level OptLevel);<br> virtual bool addPreEmitPass(PassManagerBase &PM,<br> CodeGenOpt::Level OptLevel);<br>
+ virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);<br> };<br> <br> /// MipselTargetMachine - Mipsel target machine.<br>Index: lib/Target/Mips/MipsInstrFPU.td<br>===================================================================<br>
--- lib/Target/Mips/MipsInstrFPU.td (revision 129116)<br>+++ lib/Target/Mips/MipsInstrFPU.td (working copy)<br>@@ -30,6 +30,12 @@<br> SDTCisInt<2>]>;<br> def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,<br>
SDTCisSameAs<1, 2>]>;<br>+def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,<br>+ SDTCisVT<1, i32>,<br>
+ SDTCisSameAs<1, 2>]>;<br>+def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,<br>+ SDTCisVT<1, f64>,<br>
+ SDTCisVT<0, i32>]>;<br> <br> def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;<br> def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;<br>
@@ -37,6 +43,9 @@<br> def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;<br> def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,<br> [SDNPHasChain, SDNPOptInGlue]>;<br>
+def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;<br>+def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64",<br>+ SDT_MipsExtractElementF64>;<br>
<br> // Operand for printing out a condition code.<br> let PrintMethod = "printFCCOperand" in<br>@@ -312,6 +321,21 @@<br> def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),<br> "# MOVCCRToCCR", []>;<br>
<br>+// This pseudo instr gets expanded to 2 mtc1 instrs after register allocation.<br>+def BuildPairF64 :<br>+ MipsPseudo<(outs AFGR64:$dst),<br>+ (ins CPURegs:$lo, CPURegs:$hi), "buildpair64\t$dst, $lo, $hi",<br>
+ [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;<br>+<br>+// This pseudo instr gets expanded to 2 mfc1 instrs after register allocation.<br>+// if n is 0, lower part of src is extracted.<br>
+// if n is 1, higher part of src is extracted.<br>+def hiorlo : Operand<i32>;<br>+def ExtractElementF64 :<br>+ MipsPseudo<(outs CPURegs:$dst),<br>+ (ins AFGR64:$src, hiorlo:$n), "extractelement64\t$dst, $src, $n",<br>
+ [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;<br>+<br> //===----------------------------------------------------------------------===//<br> // Floating Point Patterns<br> //===----------------------------------------------------------------------===//<br>
Index: lib/Target/Mips/MipsExpandPseudoInstrs.cpp<br>===================================================================<br>--- lib/Target/Mips/MipsExpandPseudoInstrs.cpp (revision 0)<br>+++ lib/Target/Mips/MipsExpandPseudoInstrs.cpp (revision 0)<br>
@@ -0,0 +1,116 @@<br>+//===-- ExpandPseudoInstrs.cpp - Mips delay slot filler ---------------------===//<br>+//<br>+// The LLVM Compiler Infrastructure<br>+//<br>+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>+//<br>+//===----------------------------------------------------------------------===//<br>+//<br>+// Pass run after register allocation that expands pseudo<br>+// instructions to a pair of mtc1 or mfc1 instructions.<br>
+//<br>+//===----------------------------------------------------------------------===//<br>+<br>+#define DEBUG_TYPE "expand-pseudo-instrs"<br>+<br>+#include "Mips.h"<br>+#include "MipsTargetMachine.h"<br>
+#include "llvm/CodeGen/MachineFunctionPass.h"<br>+#include "llvm/CodeGen/MachineInstrBuilder.h"<br>+#include "llvm/Target/TargetInstrInfo.h"<br>+#include "llvm/ADT/Statistic.h"<br>
+<br>+using namespace llvm;<br>+<br>+namespace {<br>+ struct ExpandPseudoInstrs : public MachineFunctionPass {<br>+<br>+ TargetMachine &TM;<br>+ const TargetInstrInfo *TII;<br>+<br>+ static char ID;<br>+ ExpandPseudoInstrs(TargetMachine &tm)<br>
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }<br>+<br>+ virtual const char *getPassName() const {<br>+ return "Mips PseudoInstrs Expansion";<br>+ }<br>+<br>+ bool runOnMachineFunction(MachineFunction &F);<br>
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);<br>+<br>+ private:<br>+ void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator);<br>+ void ExpandExtractElementF64(MachineBasicBlock&, MachineBasicBlock::iterator);<br>
+ };<br>+ char ExpandPseudoInstrs::ID = 0;<br>+} // end of anonymous namespace<br>+<br>+bool ExpandPseudoInstrs::runOnMachineFunction(MachineFunction& F) {<br>+ bool Changed = false;<br>+<br>+ for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I)<br>
+ Changed |= runOnMachineBasicBlock(*I);<br>+<br>+ return Changed;<br>+}<br>+<br>+bool ExpandPseudoInstrs::runOnMachineBasicBlock(MachineBasicBlock& MBB) {<br>+<br>+ bool Changed = false;<br>+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {<br>
+ const TargetInstrDesc& Tid = I->getDesc();<br>+<br>+ switch(Tid.getOpcode()) {<br>+ default: <br>+ ++I;<br>+ continue;<br>+ case Mips::BuildPairF64:<br>+ ExpandBuildPairF64(MBB, I);<br>
+ break;<br>+ case Mips::ExtractElementF64:<br>+ ExpandExtractElementF64(MBB, I);<br>+ break;<br>+ } <br>+<br>+ // delete original instr<br>+ MBB.erase(I++);<br>+ Changed = true;<br>+ }<br>
+<br>
+ return Changed;<br>+}<br>+<br>+void ExpandPseudoInstrs::ExpandBuildPairF64(MachineBasicBlock& MBB,<br>+ MachineBasicBlock::iterator I) { <br>+ unsigned DstReg = I->getOperand(0).getReg();<br>
+ unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();<br>+ const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);<br>+ DebugLoc dl = I->getDebugLoc();<br>+ const unsigned* SubReg =<br>
+ TM.getRegisterInfo()->getSubRegisters(DstReg);<br>+<br>+ // mtc1 Lo, $fp<br>+ // mtc1 Hi, $fp + 1<br>+ BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg);<br>+ BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg);<br>
+}<br>+<br>+void ExpandPseudoInstrs::ExpandExtractElementF64(MachineBasicBlock& MBB,<br>+ MachineBasicBlock::iterator I) {<br>+ unsigned DstReg = I->getOperand(0).getReg();<br>
+ unsigned SrcReg = I->getOperand(1).getReg();<br>+ unsigned N = I->getOperand(2).getImm();<br>+ const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);<br>+ DebugLoc dl = I->getDebugLoc();<br>+ const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);<br>
+<br>+ BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));<br>+}<br>+<br>+/// createMipsExpandPseudoInstrsPass - Returns a pass that expands pseudo <br>+/// instrs into real instrs<br>+FunctionPass *llvm::createMipsExpandPseudoInstrsPass(MipsTargetMachine &tm) {<br>
+ return new ExpandPseudoInstrs(tm);<br>+}<br>Index: lib/Target/Mips/MipsISelLowering.cpp<br>===================================================================<br>--- lib/Target/Mips/MipsISelLowering.cpp (revision 129116)<br>
+++ lib/Target/Mips/MipsISelLowering.cpp (working copy)<br>@@ -13,6 +13,7 @@<br> //===----------------------------------------------------------------------===//<br> <br> #define DEBUG_TYPE "mips-lower"<br>+#include <algorithm><br>
#include "MipsISelLowering.h"<br> #include "MipsMachineFunction.h"<br> #include "MipsTargetMachine.h"<br>@@ -52,6 +53,8 @@<br> case MipsISD::MSubu : return "MipsISD::MSubu";<br>
case MipsISD::DivRem : return "MipsISD::DivRem";<br> case MipsISD::DivRemU : return "MipsISD::DivRemU";<br>+ case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";<br>
+ case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";<br> default : return NULL;<br> }<br> }<br>@@ -1132,11 +1135,12 @@<br> if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)<br>
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);<br> if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {<br>- Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);<br>- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,<br>
- DAG.getConstant(0, getPointerTy()));<br>- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,<br>- DAG.getConstant(1, getPointerTy()));<br>
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, Arg,<br>+ DAG.getConstant(0, MVT::i32));<br>+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, Arg,<br>
+ DAG.getConstant(1, MVT::i32));<br>+ if (!Subtarget->isLittle())<br>+ std::swap(Lo, Hi);<br> RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));<br>
RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));<br> continue;<br>@@ -1429,9 +1433,10 @@<br> unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),<br> VA.getLocReg()+1, RC);<br>
SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);<br>- SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue, <br>- ArgValue2);<br>- ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair);<br>
+ if (!Subtarget->isLittle())<br>+ std::swap(ArgValue, ArgValue2);<br>+ ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,<br>+ ArgValue, ArgValue2);<br>
}<br> }<br> <br>