[llvm-commits] [llvm] r79061 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/stdarg.ll
Dan Gohman
gohman at apple.com
Fri Aug 14 18:38:56 PDT 2009
Author: djg
Date: Fri Aug 14 20:38:56 2009
New Revision: 79061
URL: http://llvm.org/viewvc/llvm-project?rev=79061&view=rev
Log:
On x86-64, for a varargs function, don't store the xmm registers to
the register save area if %al is 0. This avoids touching xmm
regsiters when they aren't actually used.
Added:
llvm/trunk/test/CodeGen/X86/stdarg.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrInfo.td
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=79061&r1=79060&r2=79061&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 14 20:38:56 2009
@@ -1527,37 +1527,44 @@
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
- SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
- DAG.getIntPtrConstant(VarArgsGPOffset));
+ unsigned Offset = VarArgsGPOffset;
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
X86::GR64RegisterClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
+ PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
+ Offset);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getIntPtrConstant(8));
+ Offset += 8;
}
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+
// Now store the XMM (fp + vector) parameter registers.
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
- DAG.getIntPtrConstant(VarArgsFPOffset));
+ SmallVector<SDValue, 11> SaveXMMOps;
+ SaveXMMOps.push_back(Chain);
+
+ unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+ SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
+ SaveXMMOps.push_back(ALVal);
+
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex));
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset));
+
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
X86::VR128RegisterClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getIntPtrConstant(16));
+ SaveXMMOps.push_back(Val);
}
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, MVT::Other,
+ &SaveXMMOps[0], SaveXMMOps.size());
}
}
@@ -7090,6 +7097,7 @@
case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
+ case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
}
}
@@ -7513,7 +7521,7 @@
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors to thisMBB to nextMBB
+ // Move all successors of thisMBB to nextMBB
nextMBB->transferSuccessors(thisMBB);
// Update thisMBB to fall through to newMBB
@@ -7585,6 +7593,73 @@
return nextMBB;
}
+MachineBasicBlock *
+X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit code to save XMM registers to the stack. The ABI says that the
+ // number of registers to save is given in %al, so it's theoretically
+ // possible to do an indirect jump trick to avoid saving all of them,
+ // however this code takes a simpler approach and just executes all
+ // of the stores if %al is non-zero. It's less code, and it's probably
+ // easier on the hardware branch predictor, and stores aren't all that
+ // expensive anyway.
+
+ // Create the new basic blocks. One block contains all the XMM stores,
+ // and one block is the final destination regardless of whether any
+ // stores were performed.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *F = MBB->getParent();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+ MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, XMMSaveMBB);
+ F->insert(MBBIter, EndMBB);
+
+ // Set up the CFG.
+ // Move any original successors of MBB to the end block.
+ EndMBB->transferSuccessors(MBB);
+ // The original block will now fall through to the XMM save block.
+ MBB->addSuccessor(XMMSaveMBB);
+ // The XMMSaveMBB will fall through to the end block.
+ XMMSaveMBB->addSuccessor(EndMBB);
+
+ // Now add the instructions.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned CountReg = MI->getOperand(0).getReg();
+ int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
+ int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
+
+ if (!Subtarget->isTargetWin64()) {
+ // If %al is 0, branch around the XMM save block.
+ BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
+ BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ }
+
+ // In the XMM save block, save all the XMM argument registers.
+ for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
+ BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
+ .addFrameIndex(RegSaveFrameIndex)
+ .addImm(/*Scale=*/1)
+ .addReg(/*IndexReg=*/0)
+ .addImm(/*Disp=*/Offset)
+ .addReg(/*Segment=*/0)
+ .addReg(MI->getOperand(i).getReg())
+ .addMemOperand(MachineMemOperand(
+ PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
+ MachineMemOperand::MOStore, Offset,
+ /*Size=*/16, /*Align=*/16));
+ }
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+
+ return EndMBB;
+}
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -7888,6 +7963,8 @@
X86::MOV32rr, X86::MOV32rr,
X86::MOV32ri, X86::MOV32ri,
false);
+ case X86::VASTART_SAVE_XMM_REGS:
+ return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
}
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=79061&r1=79060&r2=79061&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Aug 14 20:38:56 2009
@@ -243,7 +243,12 @@
MUL_IMM,
// PTEST - Vector bitwise comparisons
- PTEST
+ PTEST,
+
+ // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
+ // according to %al. An operator is needed so that this can be expanded
+ // with control flow.
+ VASTART_SAVE_XMM_REGS
};
}
@@ -715,6 +720,11 @@
MachineBasicBlock *BB,
unsigned cmovOpc) const;
+ /// Utility function to emit the xmm reg save portion of va_start.
+ MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB) const;
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=79061&r1=79060&r2=79061&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri Aug 14 20:38:56 2009
@@ -56,6 +56,10 @@
def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+ SDTCisVT<1, iPTR>,
+ SDTCisVT<2, iPTR>]>;
+
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
@@ -114,6 +118,11 @@
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInFlag]>;
+def X86vastart_save_xmm_regs :
+ SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+ SDT_X86VASTART_SAVE_XMM_REGS,
+ [SDNPHasChain]>;
+
def X86callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
[SDNPHasChain, SDNPOutFlag]>;
@@ -511,6 +520,18 @@
Requires<[In32BitMode]>;
}
+// x86-64 va_start lowering magic.
+let usesCustomDAGSchedInserter = 1 in
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+ (outs),
+ (ins GR8:$al,
+ i64imm:$regsavefi, i64imm:$offset,
+ variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+ [(X86vastart_save_xmm_regs GR8:$al,
+ imm:$regsavefi,
+ imm:$offset)]>;
+
// Nop
let neverHasSideEffects = 1 in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
Added: llvm/trunk/test/CodeGen/X86/stdarg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stdarg.ll?rev=79061&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stdarg.ll (added)
+++ llvm/trunk/test/CodeGen/X86/stdarg.ll Fri Aug 14 20:38:56 2009
@@ -0,0 +1,20 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {testb \[%\]al, \[%\]al}
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define void @foo(i32 %x, ...) nounwind {
+entry:
+ %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
+ %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
+ call void @llvm.va_start(i8* %ap12)
+ %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
+ call void @bar(%struct.__va_list_tag* %ap3) nounwind
+ call void @llvm.va_end(i8* %ap12)
+ ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind
More information about the llvm-commits
mailing list