[llvm-commits] [llvm] r50075 - in /llvm/trunk: lib/Target/X86/README-X86-64.txt lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86MachineFunctionInfo.h test/CodeGen/X86/x86-64-sret-return.ll

Dan Gohman gohman at apple.com
Mon Apr 21 16:59:07 PDT 2008


Author: djg
Date: Mon Apr 21 18:59:07 2008
New Revision: 50075

URL: http://llvm.org/viewvc/llvm-project?rev=50075&view=rev
Log:
Implement an x86-64 ABI detail of passing structs by hidden first
argument. The x86-64 ABI requires the incoming value of %rdi to
be copied to %rax on exit from a function that is returning a
large C struct.

Also, add a README-X86-64 entry detailing the missed optimization
opportunity and proposing an alternative approach.

Added:
    llvm/trunk/test/CodeGen/X86/x86-64-sret-return.ll
Modified:
    llvm/trunk/lib/Target/X86/README-X86-64.txt
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h

Modified: llvm/trunk/lib/Target/X86/README-X86-64.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README-X86-64.txt?rev=50075&r1=50074&r2=50075&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/README-X86-64.txt (original)
+++ llvm/trunk/lib/Target/X86/README-X86-64.txt Mon Apr 21 18:59:07 2008
@@ -236,3 +236,24 @@
 
 //===---------------------------------------------------------------------===//
 
+The x86-64 ABI for hidden-argument struct returns requires that the
+incoming value of %rdi be copied into %rax by the callee upon return.
+
+The idea is that it saves callers from having to remember this value,
+which would often require a callee-saved register. Callees usually
+need to keep this value live for most of their body anyway, so it
+doesn't add a significant burden on them.
+
+We currently implement this in codegen, however this is suboptimal
+because it means that it would be quite awkward to implement the
+optimization for callers.
+
+A better implementation would be to relax the LLVM IR rules for sret
+arguments to allow a function with an sret argument to have a non-void
+return type, and to have the front-end to set up the sret argument value
+as the return value of the function. The front-end could more easily
+emit uses of the returned struct value to be in terms of the function's
+lowered return value, and it would free non-C frontends from a
+complication only required by a C-based ABI.
+
+//===---------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=50075&r1=50074&r2=50075&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr 21 18:59:07 2008
@@ -875,6 +875,25 @@
     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag);
     Flag = Chain.getValue(1);
   }
+
+  // The x86-64 ABI for returning structs by value requires that we copy
+  // the sret argument into %rax for the return. We saved the argument into
+  // a virtual register in the entry block, so now we copy the value out
+  // and into %rax.
+  if (Subtarget->is64Bit() &&
+      DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+    unsigned Reg = FuncInfo->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+      FuncInfo->setSRetReturnReg(Reg);
+    }
+    SDOperand Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy());
+
+    Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag);
+    Flag = Chain.getValue(1);
+  }
   
   RetOps[0] = Chain;  // Update chain.
 
@@ -1225,6 +1244,21 @@
     }
   }
 
+  // The x86-64 ABI for returning structs by value requires that we copy
+  // the sret argument into %rax for the return. Save the argument into
+  // a virtual register so that we can access it from the return points.
+  if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+    unsigned Reg = FuncInfo->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+      FuncInfo->setSRetReturnReg(Reg);
+    }
+    SDOperand Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]);
+    Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root);
+  }
+
   unsigned StackSize = CCInfo.getNextStackOffset();
   // align stack specially for tail calls
   if (CC == CallingConv::Fast)

Modified: llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h?rev=50075&r1=50074&r2=50075&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h Mon Apr 21 18:59:07 2008
@@ -53,20 +53,27 @@
   /// the returnaddr can be savely move to this area
   int TailCallReturnAddrDelta;
 
+  /// SRetReturnReg - Some subtargets require that sret lowering includes
+  /// returning the value of the returned struct in a register. This field
+  /// holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+
 public:
   X86MachineFunctionInfo() : ForceFramePointer(false),
                              CalleeSavedFrameSize(0),
                              BytesToPopOnReturn(0),
                              DecorationStyle(None),
                              ReturnAddrIndex(0),
-                             TailCallReturnAddrDelta(0) {}
+                             TailCallReturnAddrDelta(0),
+                             SRetReturnReg(0) {}
   
   X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
                                                 CalleeSavedFrameSize(0),
                                                 BytesToPopOnReturn(0),
                                                 DecorationStyle(None),
                                                 ReturnAddrIndex(0),
-                                                TailCallReturnAddrDelta(0) {}
+                                                TailCallReturnAddrDelta(0),
+                                                SRetReturnReg(0) {}
   
   bool getForceFramePointer() const { return ForceFramePointer;} 
   void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -85,6 +92,9 @@
 
   int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
   void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 };
 } // End llvm namespace
 

Added: llvm/trunk/test/CodeGen/X86/x86-64-sret-return.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-64-sret-return.ll?rev=50075&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-64-sret-return.ll (added)
+++ llvm/trunk/test/CodeGen/X86/x86-64-sret-return.ll Mon Apr 21 18:59:07 2008
@@ -0,0 +1,54 @@
+; RUN: llvm-as < %s | llc | grep {movq	%rdi, %rax}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+	%struct.foo = type { [4 x i64] }
+
+define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
+entry:
+	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
+	%memtmp = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.foo* %d, %struct.foo** %d_addr
+	%tmp = load %struct.foo** %d_addr, align 8		; <%struct.foo*> [#uses=1]
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp2 = getelementptr %struct.foo* %tmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp3 = getelementptr [4 x i64]* %tmp1, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp4 = getelementptr [4 x i64]* %tmp2, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp5 = load i64* %tmp4, align 8		; <i64> [#uses=1]
+	store i64 %tmp5, i64* %tmp3, align 8
+	%tmp6 = getelementptr [4 x i64]* %tmp1, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp7 = getelementptr [4 x i64]* %tmp2, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp8 = load i64* %tmp7, align 8		; <i64> [#uses=1]
+	store i64 %tmp8, i64* %tmp6, align 8
+	%tmp9 = getelementptr [4 x i64]* %tmp1, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp10 = getelementptr [4 x i64]* %tmp2, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp11 = load i64* %tmp10, align 8		; <i64> [#uses=1]
+	store i64 %tmp11, i64* %tmp9, align 8
+	%tmp12 = getelementptr [4 x i64]* %tmp1, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp13 = getelementptr [4 x i64]* %tmp2, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp14 = load i64* %tmp13, align 8		; <i64> [#uses=1]
+	store i64 %tmp14, i64* %tmp12, align 8
+	%tmp15 = getelementptr %struct.foo* %memtmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp16 = getelementptr %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp17 = getelementptr [4 x i64]* %tmp15, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp18 = getelementptr [4 x i64]* %tmp16, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp19 = load i64* %tmp18, align 8		; <i64> [#uses=1]
+	store i64 %tmp19, i64* %tmp17, align 8
+	%tmp20 = getelementptr [4 x i64]* %tmp15, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp21 = getelementptr [4 x i64]* %tmp16, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp22 = load i64* %tmp21, align 8		; <i64> [#uses=1]
+	store i64 %tmp22, i64* %tmp20, align 8
+	%tmp23 = getelementptr [4 x i64]* %tmp15, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp24 = getelementptr [4 x i64]* %tmp16, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp25 = load i64* %tmp24, align 8		; <i64> [#uses=1]
+	store i64 %tmp25, i64* %tmp23, align 8
+	%tmp26 = getelementptr [4 x i64]* %tmp15, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp27 = getelementptr [4 x i64]* %tmp16, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp27, align 8		; <i64> [#uses=1]
+	store i64 %tmp28, i64* %tmp26, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}





More information about the llvm-commits mailing list