[llvm] r373226 - [AArch64][GlobalISel] Support lowering variadic musttail calls

Mon Sep 30 09:49:13 PDT 2019

Author: paquette
Date: Mon Sep 30 09:49:13 2019
New Revision: 373226

URL: http://llvm.org/viewvc/llvm-project?rev=373226&view=rev
Log:
[AArch64][GlobalISel] Support lowering variadic musttail calls

This adds support for lowering variadic musttail calls. To do this, we have
to...

- Detect a musttail call in a variadic function before attempting to lower the
  call's formal arguments. This is done in the IRTranslator.
- Compute forwarded registers in `lowerFormalArguments`, and add copies for
  those registers.
- Restore the forwarded registers in `lowerTailCall`.

Because there doesn't seem to be any nice way to wrap these up into the outgoing
argument handler, the restore code in `lowerTailCall` is done separately.

Also, irritatingly, you have to make sure that the registers don't overlap with
any passed parameters. Otherwise, the scheduler doesn't know what to do with the
extra copies and asserts.

Add call-translator-variadic-musttail.ll to test this. This is pretty much the
same as the X86 musttail-varargs.ll test. We didn't have as nice of a test to
base this off of, but the idea is the same.

Differential Revision: https://reviews.llvm.org/D68043

Added:
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
Modified:
    llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp
    llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp

Modified: llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp?rev=373226&r1=373225&r2=373226&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp Mon Sep 30 09:49:13 2019
@@ -2193,6 +2193,20 @@ void IRTranslator::finalizeFunction() {
   FuncInfo.clear();
 }
 
+/// Returns true if a BasicBlock \p BB within a variadic function contains a
+/// variadic musttail call.
+static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
+  if (!IsVarArg)
+    return false;
+
+  // Walk the block backwards, because tail calls usually only appear at the end
+  // of a block.
+  return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
+    const auto *CI = dyn_cast<CallInst>(&I);
+    return CI && CI->isMustTailCall();
+  });
+}
+
 bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   MF = &CurMF;
   const Function &F = MF->getFunction();
@@ -2254,6 +2268,9 @@ bool IRTranslator::runOnMachineFunction(
   SwiftError.setFunction(CurMF);
   SwiftError.createEntriesInEntryBlock(DbgLoc);
 
+  bool IsVarArg = F.isVarArg();
+  bool HasMustTailInVarArgFn = false;
+
   // Create all blocks, in IR order, to preserve the layout.
   for (const BasicBlock &BB: F) {
     auto *&MBB = BBToMBB[&BB];
@@ -2263,8 +2280,13 @@ bool IRTranslator::runOnMachineFunction(
 
     if (BB.hasAddressTaken())
       MBB->setHasAddressTaken();
+
+    if (!HasMustTailInVarArgFn)
+      HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
   }
 
+  MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
+
   // Make our arguments/constants entry block fallthrough to the IR entry block.
   EntryBB->addSuccessor(&getMBB(F.front()));
 

Modified: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp?rev=373226&r1=373225&r2=373226&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp Mon Sep 30 09:49:13 2019
@@ -368,6 +368,49 @@ bool AArch64CallLowering::lowerReturn(Ma
   return Success;
 }
 
+/// Helper function to compute forwarded registers for musttail calls. Computes
+/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
+/// can be used to save + restore registers later.
+static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
+                                             CCAssignFn *AssignFn) {
+  MachineBasicBlock &MBB = MIRBuilder.getMBB();
+  MachineFunction &MF = MIRBuilder.getMF();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  if (!MFI.hasMustTailInVarArgFunc())
+    return;
+
+  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+  const Function &F = MF.getFunction();
+  assert(F.isVarArg() && "Expected F to be vararg?");
+
+  // Compute the set of forwarded registers. The rest are scratch.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
+                 F.getContext());
+  SmallVector<MVT, 2> RegParmTypes;
+  RegParmTypes.push_back(MVT::i64);
+  RegParmTypes.push_back(MVT::f128);
+
+  // Later on, we can use this vector to restore the registers if necessary.
+  SmallVectorImpl<ForwardedRegister> &Forwards =
+      FuncInfo->getForwardedMustTailRegParms();
+  CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
+
+  // Conservatively forward X8, since it might be used for an aggregate
+  // return.
+  if (!CCInfo.isAllocated(AArch64::X8)) {
+    unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+    Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
+  }
+
+  // Add the forwards to the MachineBasicBlock and MachineFunction.
+  for (const auto &F : Forwards) {
+    MBB.addLiveIn(F.PReg);
+    MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
+  }
+}
+
 bool AArch64CallLowering::lowerFormalArguments(
     MachineIRBuilder &MIRBuilder, const Function &F,
     ArrayRef<ArrayRef<Register>> VRegs) const {
@@ -441,6 +484,8 @@ bool AArch64CallLowering::lowerFormalArg
   if (Subtarget.hasCustomCallingConv())
     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
 
+  handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
+
   // Move back to the end of the basic block.
   MIRBuilder.setMBB(MBB);
 
@@ -695,16 +740,6 @@ bool AArch64CallLowering::isEligibleForT
   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
          "Unexpected variadic calling convention");
 
-  // Before we can musttail varargs, we need to forward parameters like in
-  // r345641. Make sure that we don't enable musttail with varargs without
-  // addressing that!
-  if (Info.IsVarArg && Info.IsMustTailCall) {
-    LLVM_DEBUG(
-        dbgs()
-        << "... Cannot handle vararg musttail functions yet.\n");
-    return false;
-  }
-
   // Verify that the incoming and outgoing arguments from the callee are
   // safe to tail call.
   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
@@ -745,6 +780,7 @@ bool AArch64CallLowering::lowerTailCall(
   const Function &F = MF.getFunction();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 
   // True when we're tail calling, but without -tailcallopt.
   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
@@ -800,7 +836,6 @@ bool AArch64CallLowering::lowerTailCall(
     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
     // before handling assignments, because FPDiff must be known for memory
     // arguments.
-    AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
     SmallVector<CCValAssign, 16> OutLocs;
     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
@@ -823,6 +858,8 @@ bool AArch64CallLowering::lowerTailCall(
     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
   }
 
+  const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
+
   // Do the actual argument marshalling.
   SmallVector<unsigned, 8> PhysRegs;
   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
@@ -830,6 +867,27 @@ bool AArch64CallLowering::lowerTailCall(
   if (!handleAssignments(MIRBuilder, OutArgs, Handler))
     return false;
 
+  if (Info.IsVarArg && Info.IsMustTailCall) {
+    // Now we know what's being passed to the function. Add uses to the call for
+    // the forwarded registers that we *aren't* passing as parameters. This will
+    // preserve the copies we build earlier.
+    for (const auto &F : Forwards) {
+      Register ForwardedReg = F.PReg;
+      // If the register is already passed, or aliases a register which is
+      // already being passed, then skip it.
+      if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
+            if (!Use.isReg())
+              return false;
+            return TRI->regsOverlap(Use.getReg(), ForwardedReg);
+          }))
+        continue;
+
+      // We aren't passing it already, so we should add it to the call.
+      MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
+      MIB.addReg(ForwardedReg, RegState::Implicit);
+    }
+  }
+
   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
   // sequence start and end here.
   if (!IsSibCall) {

Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll?rev=373226&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll Mon Sep 30 09:49:13 2019
@@ -0,0 +1,223 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s
+
+; There are two things we want to test here:
+;  (1) We can tail call musttail calls.
+;  (2) We spill and reload all of the arguments around a normal call.
+
+declare i32 @musttail_variadic_callee(i32, ...)
+define i32 @test_musttail_variadic(i32 %arg0, ...) {
+; CHECK-LABEL: test_musttail_variadic:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    b _musttail_variadic_callee
+  %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...)
+  ret i32 %r
+}
+
+declare [2 x i64] @musttail_variadic_aggret_callee(i32 %arg0, ...)
+define [2 x i64] @test_musttail_variadic_aggret(i32 %arg0, ...) {
+; CHECK-LABEL: test_musttail_variadic_aggret:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    b _musttail_variadic_aggret_callee
+  %r = musttail call [2 x i64] (i32, ...) @musttail_variadic_aggret_callee(i32 %arg0, ...)
+  ret [2 x i64] %r
+}
+
+; Test musttailing with a normal call in the block. Test that we spill and
+; restore, as a normal call will clobber all argument registers.
+ at asdf = internal constant [4 x i8] c"asdf"
+declare void @puts(i8*)
+define i32 @test_musttail_variadic_spill(i32 %arg0, ...) {
+; CHECK-LABEL: test_musttail_variadic_spill:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #224 ; =224
+; CHECK-NEXT:    stp x28, x27, [sp, #128] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #144] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #160] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #176] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #192] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #208] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 224
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w21, -40
+; CHECK-NEXT:    .cfi_offset w22, -48
+; CHECK-NEXT:    .cfi_offset w23, -56
+; CHECK-NEXT:    .cfi_offset w24, -64
+; CHECK-NEXT:    .cfi_offset w25, -72
+; CHECK-NEXT:    .cfi_offset w26, -80
+; CHECK-NEXT:    .cfi_offset w27, -88
+; CHECK-NEXT:    .cfi_offset w28, -96
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:  Lloh0:
+; CHECK-NEXT:    adrp x0, _asdf at PAGE
+; CHECK-NEXT:  Lloh1:
+; CHECK-NEXT:    add x0, x0, _asdf at PAGEOFF
+; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    mov x21, x2
+; CHECK-NEXT:    mov x22, x3
+; CHECK-NEXT:    mov x23, x4
+; CHECK-NEXT:    mov x24, x5
+; CHECK-NEXT:    mov x25, x6
+; CHECK-NEXT:    mov x26, x7
+; CHECK-NEXT:    stp q1, q0, [sp, #96] ; 32-byte Folded Spill
+; CHECK-NEXT:    stp q3, q2, [sp, #64] ; 32-byte Folded Spill
+; CHECK-NEXT:    stp q5, q4, [sp, #32] ; 32-byte Folded Spill
+; CHECK-NEXT:    stp q7, q6, [sp] ; 32-byte Folded Spill
+; CHECK-NEXT:    mov x27, x8
+; CHECK-NEXT:    bl _puts
+; CHECK-NEXT:    ldp q1, q0, [sp, #96] ; 32-byte Folded Reload
+; CHECK-NEXT:    ldp q3, q2, [sp, #64] ; 32-byte Folded Reload
+; CHECK-NEXT:    ldp q5, q4, [sp, #32] ; 32-byte Folded Reload
+; CHECK-NEXT:    ldp q7, q6, [sp] ; 32-byte Folded Reload
+; CHECK-NEXT:    mov w0, w19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    mov x4, x23
+; CHECK-NEXT:    mov x5, x24
+; CHECK-NEXT:    mov x6, x25
+; CHECK-NEXT:    mov x7, x26
+; CHECK-NEXT:    mov x8, x27
+; CHECK-NEXT:    ldp x29, x30, [sp, #208] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #192] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #176] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #160] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #144] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #128] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #224 ; =224
+; CHECK-NEXT:    b _musttail_variadic_callee
+; CHECK-NEXT:    .loh AdrpAdd Lloh0, Lloh1
+  call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
+  %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...)
+  ret i32 %r
+}
+
+; Test musttailing with a varargs call in the block. Test that we spill and
+; reload all arguments in the variadic argument pack.
+declare void @llvm.va_start(i8*) nounwind
+declare void(i8*, ...)* @get_f(i8* %this)
+define void @f_thunk(i8* %this, ...) {
+; CHECK-LABEL: f_thunk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #256 ; =256
+; CHECK-NEXT:    stp x28, x27, [sp, #160] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #176] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #192] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #208] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #224] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #240] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 256
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w21, -40
+; CHECK-NEXT:    .cfi_offset w22, -48
+; CHECK-NEXT:    .cfi_offset w23, -56
+; CHECK-NEXT:    .cfi_offset w24, -64
+; CHECK-NEXT:    .cfi_offset w25, -72
+; CHECK-NEXT:    .cfi_offset w26, -80
+; CHECK-NEXT:    .cfi_offset w27, -88
+; CHECK-NEXT:    .cfi_offset w28, -96
+; CHECK-NEXT:    mov x27, x8
+; CHECK-NEXT:    add x8, sp, #128 ; =128
+; CHECK-NEXT:    add x9, sp, #256 ; =256
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    mov x21, x2
+; CHECK-NEXT:    mov x22, x3
+; CHECK-NEXT:    mov x23, x4
+; CHECK-NEXT:    mov x24, x5
+; CHECK-NEXT:    mov x25, x6
+; CHECK-NEXT:    mov x26, x7
+; CHECK-NEXT:    stp q1, q0, [sp, #96] ; 32-byte Folded Spill
+; CHECK-NEXT:    stp q3, q2, [sp, #64] ; 32-byte Folded Spill
+; CHECK-NEXT:    stp q5, q4, [sp, #32] ; 32-byte Folded Spill
+; CHECK-NEXT:    stp q7, q6, [sp] ; 32-byte Folded Spill
+; CHECK-NEXT:    str x9, [x8]
+; CHECK-NEXT:    bl _get_f
+; CHECK-NEXT:    mov x9, x0
+; CHECK-NEXT:    ldp q1, q0, [sp, #96] ; 32-byte Folded Reload
+; CHECK-NEXT:    ldp q3, q2, [sp, #64] ; 32-byte Folded Reload
+; CHECK-NEXT:    ldp q5, q4, [sp, #32] ; 32-byte Folded Reload
+; CHECK-NEXT:    ldp q7, q6, [sp] ; 32-byte Folded Reload
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    mov x4, x23
+; CHECK-NEXT:    mov x5, x24
+; CHECK-NEXT:    mov x6, x25
+; CHECK-NEXT:    mov x7, x26
+; CHECK-NEXT:    mov x8, x27
+; CHECK-NEXT:    ldp x29, x30, [sp, #240] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #224] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #208] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #192] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #176] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #160] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #256 ; =256
+; CHECK-NEXT:    br x9
+  %ap = alloca [4 x i8*], align 16
+  %ap_i8 = bitcast [4 x i8*]* %ap to i8*
+  call void @llvm.va_start(i8* %ap_i8)
+  %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this)
+  musttail call void (i8*, ...) %fptr(i8* %this, ...)
+  ret void
+}
+
+; We don't need any spills and reloads here, but we should still emit the
+; copies in call lowering.
+define void @g_thunk(i8* %fptr_i8, ...) {
+; CHECK-LABEL: g_thunk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    br x0
+  %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
+  musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...)
+  ret void
+}
+
+; Test that this works with multiple exits and basic blocks.
+%struct.Foo = type { i1, i8*, i8* }
+ at g = external global i32
+define void @h_thunk(%struct.Foo* %this, ...) {
+; CHECK-LABEL: h_thunk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrb w9, [x0]
+; CHECK-NEXT:    tbz w9, #0, LBB5_2
+; CHECK-NEXT:  ; %bb.1: ; %then
+; CHECK-NEXT:    ldr x9, [x0, #8]
+; CHECK-NEXT:    br x9
+; CHECK-NEXT:  LBB5_2: ; %else
+; CHECK-NEXT:  Lloh2:
+; CHECK-NEXT:    adrp x10, _g at GOTPAGE
+; CHECK-NEXT:    ldr x9, [x0, #16]
+; CHECK-NEXT:  Lloh3:
+; CHECK-NEXT:    ldr x10, [x10, _g at GOTPAGEOFF]
+; CHECK-NEXT:    mov w11, #42
+; CHECK-NEXT:  Lloh4:
+; CHECK-NEXT:    str w11, [x10]
+; CHECK-NEXT:    br x9
+; CHECK-NEXT:    .loh AdrpLdrGotStr Lloh2, Lloh3, Lloh4
+  %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0
+  %cond = load i1, i1* %cond_p
+  br i1 %cond, label %then, label %else
+
+then:
+  %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1
+  %a_i8 = load i8*, i8** %a_p
+  %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
+  musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...)
+  ret void
+
+else:
+  %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2
+  %b_i8 = load i8*, i8** %b_p
+  %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
+  store i32 42, i32* @g
+  musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...)
+  ret void
+}