[llvm-commits] [llvm] r162725 - in /llvm/trunk: lib/Target/PowerPC/PPCISelLowering.cpp lib/Target/PowerPC/PPCISelLowering.h lib/Target/PowerPC/PPCInstrInfo.td test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
Hal Finkel
hfinkel at anl.gov
Mon Aug 27 19:10:27 PDT 2012
Author: hfinkel
Date: Mon Aug 27 21:10:27 2012
New Revision: 162725
URL: http://llvm.org/viewvc/llvm-project?rev=162725&view=rev
Log:
Eliminate redundant CR moves on PPC32.
The 32-bit ABI requires CR bit 6 to be set if the call has fp arguments and
unset if it doesn't. The solution up to now was to insert a MachineNode to
set/unset the CR bit, which produces a CR vreg. This vreg was then copied
into CR bit 6. When the register allocator saw a bunch of these in the same
function, it allocated the set/unset CR bit in some random CR register (1
extra instruction) and then emitted CR moves before every vararg function
call, rather than just setting and unsetting CR bit 6 directly before every
vararg function call. This patch instead inserts a PPCcrset/PPCcrunset
instruction which are then matched by a dedicated instruction pattern.
Patch by Tobias von Koch.
Added:
llvm/trunk/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=162725&r1=162724&r2=162725&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon Aug 27 21:10:27 2012
@@ -517,6 +517,8 @@
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::CR6SET: return "PPCISD::CR6SET";
+ case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
}
}
@@ -2834,6 +2836,10 @@
isTailCall, RegsToPass, Ops, NodeTys,
PPCSubTarget);
+ // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+ if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
@@ -3131,14 +3137,6 @@
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Set CR6 to true if this is a vararg call with floating args passed in
- // registers.
- if (isVarArg) {
- SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
- dl, MVT::i32), 0);
- RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
- }
-
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
@@ -3148,6 +3146,14 @@
InFlag = Chain.getValue(1);
}
+ // Set CR bit 6 to true if this is a vararg call with floating args passed in
+ // registers.
+ if (isVarArg) {
+ Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
+ dl, DAG.getVTList(MVT::Other, MVT::Glue), Chain);
+ InFlag = Chain.getValue(1);
+ }
+
if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=162725&r1=162724&r2=162725&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Mon Aug 27 21:10:27 2012
@@ -174,6 +174,10 @@
/// operand #3 optional in flag
TC_RETURN,
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=162725&r1=162724&r2=162725&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Mon Aug 27 21:10:27 2012
@@ -155,6 +155,12 @@
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
+// Instructions to set/unset CR bit 6 for SVR4 vararg calls
+def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
// Instructions to support atomic operations
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
[SDNPHasChain, SDNPMayLoad]>;
@@ -1145,6 +1151,16 @@
"crxor $dst, $dst, $dst", BrCR,
[]>;
+let Defs = [CR1EQ], CRD = 6 in {
+def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
+ "creqv 6, 6, 6", BrCR,
+ [(PPCcr6set)]>;
+
+def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
+ "crxor 6, 6, 6", BrCR,
+ [(PPCcr6unset)]>;
+}
+
// XFX-Form instructions. Instructions that deal with SPRs.
//
let Uses = [CTR] in {
Added: llvm/trunk/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll?rev=162725&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll Mon Aug 27 21:10:27 2012
@@ -0,0 +1,26 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux"
+
+ at .str = private unnamed_addr constant [3 x i8] c"%i\00", align 1
+
+define void @test(i32 %count) nounwind {
+entry:
+; CHECK: crxor 6, 6, 6
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+ %cmp2 = icmp sgt i32 %count, 0
+ br i1 %cmp2, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+; CHECK: crxor 6, 6, 6
+ %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+ %inc = add nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, %count
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
More information about the llvm-commits
mailing list