[llvm] 376d7b2 - [ARM] Optimise byval arguments in tail-calls

Oliver Stannard via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 25 01:35:04 PDT 2024


Author: Oliver Stannard
Date: 2024-10-25T09:34:09+01:00
New Revision: 376d7b27fa3de4f72c2a3cec4f941c1ca3f1d7f2

URL: https://github.com/llvm/llvm-project/commit/376d7b27fa3de4f72c2a3cec4f941c1ca3f1d7f2
DIFF: https://github.com/llvm/llvm-project/commit/376d7b27fa3de4f72c2a3cec4f941c1ca3f1d7f2.diff

LOG: [ARM] Optimise byval arguments in tail-calls

We don't need to copy byval arguments to tail calls via a temporary, if
we can prove that we are not copying from the outgoing argument area.
This patch does this when the source if the argument is one of:
* Memory in the local stack frame, which can't be used for tail-call
  arguments.
* A global variable.

We can also avoid doing the copy completely if the source and
destination are the same memory location, which is the case when the
caller and callee have the same signature, and pass some arguments
through unmodified.

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/lib/Target/ARM/ARMISelLowering.h
    llvm/test/CodeGen/ARM/musttail.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6cc0b681193f55..a98b7a8420927e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2325,6 +2325,59 @@ std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
   return std::make_pair(DstAddr, DstInfo);
 }
 
+// Returns the type of copying which is required to set up a byval argument to
+// a tail-called function. This isn't needed for non-tail calls, because they
+// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
+// avoid clobbering another argument (CopyViaTemp), and sometimes can be
+// optimised to zero copies when forwarding an argument from the caller's
+// caller (NoCopy).
+ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
+    SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+
+  // Globals are always safe to copy from.
+  if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
+    return CopyOnce;
+
+  // Can only analyse frame index nodes, conservatively assume we need a
+  // temporary.
+  auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
+  auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
+  if (!SrcFrameIdxNode || !DstFrameIdxNode)
+    return CopyViaTemp;
+
+  int SrcFI = SrcFrameIdxNode->getIndex();
+  int DstFI = DstFrameIdxNode->getIndex();
+  assert(MFI.isFixedObjectIndex(DstFI) &&
+         "byval passed in non-fixed stack slot");
+
+  int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
+  int64_t DstOffset = MFI.getObjectOffset(DstFI);
+
+  // If the source is in the local frame, then the copy to the argument memory
+  // is always valid.
+  bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
+  if (!FixedSrc ||
+      (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
+    return CopyOnce;
+
+  // In the case of byval arguments split between registers and the stack,
+  // computeAddrForCallArg returns a FrameIndex which corresponds only to the
+  // stack portion, but the Src SDValue will refer to the full value, including
+  // the local stack memory that the register portion gets stored into. We only
+  // need to compare them for equality, so normalise on the full value version.
+  uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
+  DstOffset -= RegSize;
+
+  // If the value is already in the correct location, then no copying is
+  // needed. If not, then we need to copy via a temporary.
+  if (SrcOffset == DstOffset)
+    return NoCopy;
+  else
+    return CopyViaTemp;
+}
+
 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
                                          SDValue Chain, SDValue &Arg,
                                          RegsToPassVector &RegsToPass,
@@ -2499,37 +2552,58 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // overwritten by the stores of the outgoing arguments. To avoid this, we
   // need to make a temporary copy of them in local stack space, then copy back
   // to the argument area.
-  // TODO This could be optimised to skip byvals which are already being copied
-  // from local stack space, or which are copied from the incoming stack at the
-  // exact same location.
   DenseMap<unsigned, SDValue> ByValTemporaries;
   SDValue ByValTempChain;
   if (isTailCall) {
-    for (unsigned ArgIdx = 0, e = OutVals.size(); ArgIdx != e; ++ArgIdx) {
-      SDValue Arg = OutVals[ArgIdx];
+    SmallVector<SDValue, 8> ByValCopyChains;
+    for (const CCValAssign &VA : ArgLocs) {
+      unsigned ArgIdx = VA.getValNo();
+      SDValue Src = OutVals[ArgIdx];
       ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
 
-      if (Flags.isByVal()) {
-        int FrameIdx = MFI.CreateStackObject(
+      if (!Flags.isByVal())
+        continue;
+
+      SDValue Dst;
+      MachinePointerInfo DstInfo;
+      std::tie(Dst, DstInfo) =
+          computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
+      ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
+
+      if (Copy == NoCopy) {
+        // If the argument is already at the correct offset on the stack
+        // (because we are forwarding a byval argument from our caller), we
+        // don't need any copying.
+        continue;
+      } else if (Copy == CopyOnce) {
+        // If the argument is in our local stack frame, no other argument
+        // preparation can clobber it, so we can copy it to the final location
+        // later.
+        ByValTemporaries[ArgIdx] = Src;
+      } else {
+        assert(Copy == CopyViaTemp && "unexpected enum value");
+        // If we might be copying this argument from the outgoing argument
+        // stack area, we need to copy via a temporary in the local stack
+        // frame.
+        int TempFrameIdx = MFI.CreateStackObject(
             Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
-        SDValue Dst =
-            DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
+        SDValue Temp =
+            DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
 
         SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
         SDValue AlignNode =
             DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
 
         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue Ops[] = { Chain, Dst, Arg, SizeNode, AlignNode};
-        MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
-                                          Ops));
-        ByValTemporaries[ArgIdx] = Dst;
+        SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
+        ByValCopyChains.push_back(
+            DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
+        ByValTemporaries[ArgIdx] = Temp;
       }
     }
-    if (!MemOpChains.empty()) {
-      ByValTempChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
-      MemOpChains.clear();
-    }
+    if (!ByValCopyChains.empty())
+      ByValTempChain =
+          DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
   }
 
   // During a tail call, stores to the argument area must happen after all of
@@ -2644,13 +2718,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
 
       SDValue ByValSrc;
-      if (ByValTemporaries.contains(realArgIdx))
+      bool NeedsStackCopy;
+      if (ByValTemporaries.contains(realArgIdx)) {
         ByValSrc = ByValTemporaries[realArgIdx];
-      else
+        NeedsStackCopy = true;
+      } else {
         ByValSrc = Arg;
+        NeedsStackCopy = !isTailCall;
+      }
 
+      // If part of the argument is in registers, load them.
       if (CurByValIdx < ByValArgsCount) {
-
         unsigned RegBegin, RegEnd;
         CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
 
@@ -2674,7 +2752,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         CCInfo.nextInRegsParam();
       }
 
-      if (Flags.getByValSize() > 4*offset) {
+      // If the memory part of the argument isn't already in the correct place
+      // (which can happen with tail calls), copy it into the argument area.
+      if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
         auto PtrVT = getPointerTy(DAG.getDataLayout());
         SDValue Dst;
         MachinePointerInfo DstInfo;

diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index ef651bc3d84c0f..0e086f3340ccb4 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -395,6 +395,19 @@ class VectorType;
   //  ARMTargetLowering - ARM Implementation of the TargetLowering interface
 
   class ARMTargetLowering : public TargetLowering {
+    // Copying needed for an outgoing byval argument.
+    enum ByValCopyKind {
+      // Argument is already in the correct location, no copy needed.
+      NoCopy,
+      // Argument value is currently in the local stack frame, needs copying to
+      // outgoing arguemnt area.
+      CopyOnce,
+      // Argument value is currently in the outgoing argument area, but not at
+      // the correct offset, so needs copying via a temporary in local stack
+      // space.
+      CopyViaTemp,
+    };
+
   public:
     explicit ARMTargetLowering(const TargetMachine &TM,
                                const ARMSubtarget &STI);
@@ -809,6 +822,9 @@ class VectorType;
     computeAddrForCallArg(const SDLoc &dl, SelectionDAG &DAG,
                           const CCValAssign &VA, SDValue StackPtr,
                           bool IsTailCall, int SPDiff) const;
+    ByValCopyKind ByValNeedsCopyForTailCall(SelectionDAG &DAG, SDValue Src,
+                                            SDValue Dst,
+                                            ISD::ArgFlagsTy Flags) const;
     SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;

diff  --git a/llvm/test/CodeGen/ARM/musttail.ll b/llvm/test/CodeGen/ARM/musttail.ll
index 3c577134f696d8..aa7b81d0700218 100644
--- a/llvm/test/CodeGen/ARM/musttail.ll
+++ b/llvm/test/CodeGen/ARM/musttail.ll
@@ -129,40 +129,15 @@ declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
 ; actually passed in registers and the stack in the same way for the caller and
 ; callee. Within @large_caller the first 16 bytes of the argument are spilled
 ; to the local stack frame, but for the tail-call they are passed in r0-r3, so
-; it's safe to de-allocate that memory before the call. Most of the code
-; generated for this isn't needed, but that's a missed optimisation, not a
-; correctness issue.
+; it's safe to de-allocate that memory before the call.
+; TODO: The SUB and STM instructions are unnecessary and could be optimised
+; out, but the behaviour of this is still correct.
 define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
 ; CHECK-LABEL: large_caller:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .pad #16
 ; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    .pad #20
-; CHECK-NEXT:    sub sp, sp, #20
-; CHECK-NEXT:    add r12, sp, #28
-; CHECK-NEXT:    add lr, sp, #44
-; CHECK-NEXT:    stm r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    add r0, sp, #28
-; CHECK-NEXT:    mov r1, sp
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    add r12, r1, #16
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldm sp, {r0, r1, r2, r3}
-; CHECK-NEXT:    ldr r4, [r12], #4
-; CHECK-NEXT:    str r4, [lr], #4
-; CHECK-NEXT:    add sp, sp, #20
-; CHECK-NEXT:    pop {r4, lr}
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    stm sp!, {r0, r1, r2, r3}
 ; CHECK-NEXT:    b large_callee
 entry:
   musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
@@ -176,34 +151,10 @@ define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .pad #16
 ; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    .pad #20
-; CHECK-NEXT:    sub sp, sp, #20
-; CHECK-NEXT:    add r12, sp, #28
-; CHECK-NEXT:    add lr, sp, #44
-; CHECK-NEXT:    stm r12, {r0, r1, r2, r3}
+; CHECK-NEXT:    stm sp, {r0, r1, r2, r3}
 ; CHECK-NEXT:    @APP
 ; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    add r0, sp, #28
-; CHECK-NEXT:    mov r1, sp
-; CHECK-NEXT:    add r12, r1, #16
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldm sp, {r0, r1, r2, r3}
-; CHECK-NEXT:    ldr r4, [r12], #4
-; CHECK-NEXT:    str r4, [lr], #4
-; CHECK-NEXT:    add sp, sp, #20
-; CHECK-NEXT:    pop {r4, lr}
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    pop {r0, r1, r2, r3}
 ; CHECK-NEXT:    b large_callee
 entry:
   tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
@@ -218,44 +169,30 @@ entry:
 define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
 ; CHECK-LABEL: large_caller_new_value:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    .pad #40
-; CHECK-NEXT:    sub sp, sp, #40
-; CHECK-NEXT:    add r12, sp, #48
-; CHECK-NEXT:    add lr, sp, #64
+; CHECK-NEXT:    .pad #36
+; CHECK-NEXT:    sub sp, sp, #36
+; CHECK-NEXT:    add r12, sp, #20
 ; CHECK-NEXT:    stm r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, #4
-; CHECK-NEXT:    mov r1, sp
-; CHECK-NEXT:    str r0, [sp, #36]
+; CHECK-NEXT:    add r1, sp, #36
+; CHECK-NEXT:    str r0, [sp, #16]
 ; CHECK-NEXT:    mov r0, #3
-; CHECK-NEXT:    str r0, [sp, #32]
+; CHECK-NEXT:    str r0, [sp, #12]
 ; CHECK-NEXT:    mov r0, #2
-; CHECK-NEXT:    str r0, [sp, #28]
+; CHECK-NEXT:    str r0, [sp, #8]
 ; CHECK-NEXT:    mov r0, #1
-; CHECK-NEXT:    str r0, [sp, #24]
+; CHECK-NEXT:    str r0, [sp, #4]
 ; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    str r0, [sp, #20]
-; CHECK-NEXT:    add r0, sp, #20
-; CHECK-NEXT:    add r12, r1, #16
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldr r2, [r0], #4
-; CHECK-NEXT:    str r2, [r1], #4
+; CHECK-NEXT:    str r0, [sp]
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    add r0, r0, #16
+; CHECK-NEXT:    mov r3, #3
 ; CHECK-NEXT:    ldr r2, [r0], #4
 ; CHECK-NEXT:    str r2, [r1], #4
-; CHECK-NEXT:    ldm sp, {r0, r1, r2, r3}
-; CHECK-NEXT:    ldr r4, [r12], #4
-; CHECK-NEXT:    str r4, [lr], #4
-; CHECK-NEXT:    add sp, sp, #40
-; CHECK-NEXT:    pop {r4, lr}
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    mov r1, #1
+; CHECK-NEXT:    mov r2, #2
+; CHECK-NEXT:    add sp, sp, #36
 ; CHECK-NEXT:    b large_callee
 entry:
   %y = alloca %twenty_bytes, align 4
@@ -335,3 +272,71 @@ entry:
   musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)
   ret void
 }
+
+; A forwarded byval arg, but at a 
diff erent offset on the stack, so it needs to
+; be copied to the local stack frame first. This can't be musttail because of
+; the 
diff erent signatures, but is still tail-called as an optimisation.
+declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
+define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
+; CHECK-LABEL: shift_byval:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #12
+; CHECK-NEXT:    sub sp, sp, #12
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    .pad #20
+; CHECK-NEXT:    sub sp, sp, #20
+; CHECK-NEXT:    add r0, sp, #28
+; CHECK-NEXT:    add lr, sp, #40
+; CHECK-NEXT:    stm r0, {r1, r2, r3}
+; CHECK-NEXT:    add r0, sp, #28
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    ldr r2, [r0], #4
+; CHECK-NEXT:    add r12, r1, #16
+; CHECK-NEXT:    str r2, [r1], #4
+; CHECK-NEXT:    ldr r2, [r0], #4
+; CHECK-NEXT:    str r2, [r1], #4
+; CHECK-NEXT:    ldr r2, [r0], #4
+; CHECK-NEXT:    str r2, [r1], #4
+; CHECK-NEXT:    ldr r2, [r0], #4
+; CHECK-NEXT:    str r2, [r1], #4
+; CHECK-NEXT:    ldr r2, [r0], #4
+; CHECK-NEXT:    str r2, [r1], #4
+; CHECK-NEXT:    ldm sp, {r0, r1, r2, r3}
+; CHECK-NEXT:    ldr r4, [r12], #4
+; CHECK-NEXT:    str r4, [lr], #4
+; CHECK-NEXT:    add sp, sp, #20
+; CHECK-NEXT:    pop {r4, lr}
+; CHECK-NEXT:    add sp, sp, #12
+; CHECK-NEXT:    b shift_byval_callee
+entry:
+  tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b)
+  ret void
+}
+
+; A global object passed to a byval argument, so it must be copied, but doesn't
+; need a stack temporary.
+ at large_global = external global %twenty_bytes
+define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
+; CHECK-LABEL: large_caller_from_global:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    add r12, sp, #8
+; CHECK-NEXT:    add lr, sp, #24
+; CHECK-NEXT:    stm r12, {r0, r1, r2, r3}
+; CHECK-NEXT:    movw r3, :lower16:large_global
+; CHECK-NEXT:    movt r3, :upper16:large_global
+; CHECK-NEXT:    add r12, r3, #16
+; CHECK-NEXT:    ldm r3, {r0, r1, r2, r3}
+; CHECK-NEXT:    ldr r4, [r12], #4
+; CHECK-NEXT:    str r4, [lr], #4
+; CHECK-NEXT:    pop {r4, lr}
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    b large_callee
+entry:
+  musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global)
+  ret void
+}


        


More information about the llvm-commits mailing list