[llvm] pr/amdgpu closed world (PR #66488)

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 15 03:42:57 PDT 2023


https://github.com/jdoerfert created https://github.com/llvm/llvm-project/pull/66488

None

>From a4753015bae5fa9b762487bb722701fcf4c30b55 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Fri, 15 Sep 2023 03:33:46 -0700
Subject: [PATCH 1/2] [Attributor][NFC] Add NumCallees argument to callback

---
 llvm/include/llvm/Transforms/IPO/Attributor.h    | 7 ++++---
 llvm/lib/Transforms/IPO/Attributor.cpp           | 2 +-
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 3 ++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index bd1bd8261123e51..9649cfd668b3a09 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1717,10 +1717,11 @@ struct Attributor {
   /// Return true if we should specialize the call site \b CB for the potential
   /// callee \p Fn.
   bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA,
-                                         CallBase &CB, Function &Callee) {
+                                         CallBase &CB, Function &Callee,
+                                         unsigned NumCallees) {
     return Configuration.IndirectCalleeSpecializationCallback
-               ? Configuration.IndirectCalleeSpecializationCallback(*this, AA,
-                                                                    CB, Callee)
+               ? Configuration.IndirectCalleeSpecializationCallback(
+                     *this, AA, CB, Callee, NumCallees)
                : true;
   }
 
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 1ffafc65ba63a4f..5b5a9a28f6d3838 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3821,7 +3821,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
   if (MaxSpecializationPerCB.getNumOccurrences()) {
     AC.IndirectCalleeSpecializationCallback =
         [&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
-            Function &Callee) {
+            Function &Callee, unsigned NumCallees) {
           if (MaxSpecializationPerCB == 0)
             return false;
           auto &Set = IndirectCalleeTrackingMap[&CB];
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 03b5dc3899ac8f8..86c6bb04368e241 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12352,7 +12352,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
     SmallVector<Function *, 8> SkippedAssumedCallees;
     SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
     for (Function *NewCallee : AssumedCallees) {
-      if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
+      if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee,
+                                               AssumedCallees.size())) {
         SkippedAssumedCallees.push_back(NewCallee);
         SpecializedForAllCallees = false;
         continue;

>From 0f7358da9f44281f06d336cb25ddf9163c00f75a Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Wed, 23 Aug 2023 17:28:51 -0700
Subject: [PATCH 2/2] [Attributor][AMDGPU] Improve indirect call support in
 closed modules

If we see all functions that can be called, thus in a "closed world",
we can perform better reasoning in the presence of unknown callees of
indirect calls. We now collect all indirectly callable functions and
limit the potentially called functions to those.

The AMDGPU backend is the only user for now. We should enable this for
AMDGPU (and NVIDIA GPUs in certain cases) also when we run the
Attributor (or OpenMP-opt) earlier in the pipeline.
---
 llvm/include/llvm/Transforms/IPO/Attributor.h |    2 +-
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   |   33 +-
 .../GlobalISel/irtranslator-indirect-call.ll  |   36 +-
 .../annotate-kernel-features-hsa-call.ll      |   99 +-
 .../AMDGPU/attributor-loop-issue-58639.ll     |   65 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll    |   44 +-
 .../AMDGPU/duplicate-attribute-indirect.ll    |   32 +-
 .../enable-scratch-only-dynamic-stack.ll      |   26 +-
 llvm/test/CodeGen/AMDGPU/indirect-call.ll     | 4009 +++++++++--------
 .../AMDGPU/resource-optimization-remarks.ll   |   65 +-
 llvm/test/CodeGen/AMDGPU/sibling-call.ll      |    6 +-
 .../CodeGen/AMDGPU/simple-indirect-call.ll    |   48 +-
 12 files changed, 2513 insertions(+), 1952 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 9649cfd668b3a09..f266620b65ca1fe 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1435,7 +1435,7 @@ struct AttributorConfig {
   /// Callback function to determine if an indirect call targets should be made
   /// direct call targets (with an if-cascade).
   std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB,
-                     Function &AssummedCallee)>
+                     Function &AssummedCallee, unsigned NumCallees)>
       IndirectCalleeSpecializationCallback = nullptr;
 
   /// Helper to update an underlying call graph and to delete functions.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 57c873f00a4a195..fb203c9e4006426 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -14,11 +14,15 @@
 #include "GCNSubtarget.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/Attributor.h"
+#include <optional>
 
 #define DEBUG_TYPE "amdgpu-attributor"
 
@@ -944,16 +948,29 @@ class AMDGPUAttributor : public ModulePass {
         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
          &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
          &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
-         &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
+         &AAIndirectCallInfo::ID, &AAPotentialConstantValues::ID,
+         &AAUnderlyingObjects::ID});
 
     AttributorConfig AC(CGUpdater);
     AC.Allowed = &Allowed;
     AC.IsModulePass = true;
     AC.DefaultInitializeLiveInternals = false;
+    AC.IsClosedWorldModule = true;
     AC.IPOAmendableCB = [](const Function &F) {
       return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
     };
 
+    // Callback to determine if we should specialize a indirect call site with a
+    // specific callee. It's effectively a heuristic and we can add checks for
+    // the callee size, PGO, etc. For now, we check for single potential callees
+    // and kernel arguments as they are known uniform values.
+    AC.IndirectCalleeSpecializationCallback =
+        [&](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
+            Function &Callee, unsigned NumCallees) {
+          return indirectCalleeSpecializationCallback(A, AA, CB, Callee,
+                                                      NumCallees);
+        };
+
     Attributor A(Functions, InfoCache, AC);
 
     for (Function &F : M) {
@@ -975,6 +992,20 @@ class AMDGPUAttributor : public ModulePass {
     AU.addRequired<CycleInfoWrapperPass>();
   }
 
+  /// Helper to decide if we should specialize the indirect \p CB for \p Callee,
+  /// which is one of the \p NumCallees potential callees.
+  bool indirectCalleeSpecializationCallback(Attributor &A,
+                                            const AbstractAttribute &AA,
+                                            CallBase &CB, Function &Callee,
+                                            unsigned NumCallees) {
+    // Singleton functions should be specialized.
+    if (NumCallees == 1)
+      return true;
+    // Otherewise specialize uniform values.
+    const auto &TTI = TM->getTargetTransformInfo(*CB.getCaller());
+    return TTI.isAlwaysUniform(CB.getCalledOperand());
+  }
+
   StringRef getPassName() const override { return "AMDGPU Attributor"; }
   TargetMachine *TM;
   static char ID;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index 4eba84f61c2d8a3..0bb9a58c29ae718 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -attributor-assume-closed-world=false -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CHECK %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CWRLD %s
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
@@ -52,24 +53,31 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
+  ;
+  ; CWRLD-LABEL: name: test_indirect_call_sgpr_ptr
+  ; CWRLD: bb.1 (%ir-block.0):
+  ; CWRLD-NEXT:   liveins: $sgpr4_sgpr5
+  ; CWRLD-NEXT: {{  $}}
+  ; CWRLD-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; CWRLD-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   call void %fptr()
   ret void
 }
 
 define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) {
-  ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr
-  ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   SI_RETURN
+  ; SAMEC-LABEL: name: test_gfx_indirect_call_sgpr_ptr
+  ; SAMEC: bb.1 (%ir-block.0):
+  ; SAMEC-NEXT:   liveins: $vgpr0, $vgpr1
+  ; SAMEC-NEXT: {{  $}}
+  ; SAMEC-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; SAMEC-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; SAMEC-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; SAMEC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; SAMEC-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; SAMEC-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
+  ; SAMEC-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; SAMEC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; SAMEC-NEXT:   SI_RETURN
   call amdgpu_gfx void %fptr()
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index c8ac5bcf8d22242..35affc7a8b140be 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_HSA %s
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor -attributor-assume-closed-world=false < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,OWRLD_ATTR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,CWRLD_ATTR_HSA %s
 
 ; TODO: The test contains UB which is refined by the Attributor and should be removed.
 
@@ -18,6 +19,16 @@ declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
 declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
 declare i64 @llvm.amdgcn.dispatch.id() #0
 
+ at G1 = global ptr poison
+ at G2 = global ptr poison
+
+;.
+; AKF_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr poison
+; AKF_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr poison
+;.
+; ATTRIBUTOR_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr poison
+; ATTRIBUTOR_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr poison
+;.
 define void @use_workitem_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
 ; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] {
@@ -766,19 +777,55 @@ define float @func_indirect_call(ptr %fptr) #3 {
 ; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
 ; AKF_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; AKF_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; AKF_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
-; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
-; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
-; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
-; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
+; OWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
+; OWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; OWRLD_ATTR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
+; OWRLD_ATTR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; OWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; OWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
+; OWRLD_ATTR_HSA-NEXT:    ret float [[FADD]]
+;
+; CWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
+; CWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] {
+; CWRLD_ATTR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]](), !callees !0
+; CWRLD_ATTR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; CWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; CWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
+; CWRLD_ATTR_HSA-NEXT:    ret float [[FADD]]
 ;
   %f = call float %fptr()
   %fadd = fadd float %f, 1.0
+  store ptr @indirect_callee1, ptr @G1
+  store ptr @indirect_callee2, ptr @G2
   ret float %fadd
 }
 
+define float @indirect_callee1() {
+; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee1() {
+; AKF_HSA-NEXT:    ret float 0x40091EB860000000
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee1
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-NEXT:    ret float 0x40091EB860000000
+;
+  ret float 0x40091EB860000000
+}
+define float @indirect_callee2(float noundef %arg) {
+; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee2
+; AKF_HSA-SAME: (float noundef [[ARG:%.*]]) {
+; AKF_HSA-NEXT:    ret float [[ARG]]
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee2
+; ATTRIBUTOR_HSA-SAME: (float noundef [[ARG:%.*]]) #[[ATTR19]] {
+; ATTRIBUTOR_HSA-NEXT:    ret float [[ARG]]
+;
+  ret float %arg
+}
+
 declare float @extern() #3
 define float @func_extern_call() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call
@@ -845,7 +892,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -861,7 +908,7 @@ define void @func_sanitize_address() #4 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -877,7 +924,7 @@ define void @func_indirect_sanitize_address() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -893,7 +940,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -928,7 +975,7 @@ define internal void @enqueue_block_def() #6 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -941,7 +988,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_decl()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -956,7 +1003,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_def()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -969,7 +1016,7 @@ define void @unused_enqueue_block() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -980,7 +1027,7 @@ define internal void @known_func() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -994,7 +1041,7 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @known_func() #[[ATTR29:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -1040,15 +1087,17 @@ attributes #6 = { "enqueued-block" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR25:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR28]] = { nounwind }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR29]] = { "enqueued-block" }
 ;.
+; CWRLD_ATTR_HSA: [[META0:![0-9]+]] = !{ptr @indirect_callee1, ptr @indirect_callee2}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
index 60da0445927743a..0c489d0c443e29e 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor  -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=CHECK,OWRLD
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,CWRLD
 
 %0 = type { ptr, ptr }
 
@@ -20,19 +21,32 @@ bb:
 }
 
 define internal fastcc double @baz(ptr %arg) {
-; CHECK-LABEL: define {{[^@]+}}@baz
-; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call double [[TMP1]]()
-; CHECK-NEXT:    br label [[BB3:%.*]]
-; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
-; CHECK-NEXT:    br label [[BB5:%.*]]
-; CHECK:       bb5:
-; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
-; CHECK-NEXT:    br label [[BB5]]
+; OWRLD-LABEL: define {{[^@]+}}@baz
+; OWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
+; OWRLD-NEXT:  bb:
+; OWRLD-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
+; OWRLD-NEXT:    [[TMP2:%.*]] = tail call double [[TMP1]]()
+; OWRLD-NEXT:    br label [[BB3:%.*]]
+; OWRLD:       bb3:
+; OWRLD-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
+; OWRLD-NEXT:    br label [[BB5:%.*]]
+; OWRLD:       bb5:
+; OWRLD-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+; OWRLD-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
+; OWRLD-NEXT:    br label [[BB5]]
+;
+; CWRLD-LABEL: define {{[^@]+}}@baz
+; CWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
+; CWRLD-NEXT:  bb:
+; CWRLD-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
+; CWRLD-NEXT:    unreachable
+; CWRLD:       bb3:
+; CWRLD-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
+; CWRLD-NEXT:    br label [[BB5:%.*]]
+; CWRLD:       bb5:
+; CWRLD-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CWRLD-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
+; CWRLD-NEXT:    br label [[BB5]]
 ;
 bb:
   %tmp1 = load ptr, ptr %arg, align 8
@@ -49,13 +63,19 @@ bb5:                                              ; preds = %bb5, %bb3
   br label %bb5
 }
 
-define amdgpu_kernel void @entry() {
-; CHECK-LABEL: define {{[^@]+}}@entry
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
-; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
-; CHECK-NEXT:    ret void
+define amdgpu_kernel void @entry() { ; OWRLD-LABEL: define {{[^@]+}}@entry
+; OWRLD-SAME: () #[[ATTR0]] {
+; OWRLD-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
+; OWRLD-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; OWRLD-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@entry
+; CWRLD-SAME: () #[[ATTR1:[0-9]+]] {
+; CWRLD-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
+; CWRLD-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CWRLD-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
+; CWRLD-NEXT:    ret void
 ;
   %alloca = alloca %0, align 8, addrspace(5)
   %cast = addrspacecast ptr addrspace(5) %alloca to ptr
@@ -63,5 +83,6 @@ define amdgpu_kernel void @entry() {
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index c20df45f10a2950..61ba5cc8c7b177d 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor  -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=CHECK,OWRLD
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,CWRLD
 
 define internal void @indirect() {
 ; CHECK-LABEL: define {{[^@]+}}@indirect
@@ -10,13 +11,21 @@ define internal void @indirect() {
 }
 
 define internal void @direct() {
-; CHECK-LABEL: define {{[^@]+}}@direct
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; CHECK-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
-; CHECK-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; CHECK-NEXT:    call void [[FP]]()
-; CHECK-NEXT:    ret void
+; OWRLD-LABEL: define {{[^@]+}}@direct
+; OWRLD-SAME: () #[[ATTR1:[0-9]+]] {
+; OWRLD-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; OWRLD-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; OWRLD-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; OWRLD-NEXT:    call void [[FP]]()
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@direct
+; CWRLD-SAME: () #[[ATTR0]] {
+; CWRLD-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; CWRLD-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; CWRLD-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; CWRLD-NEXT:    call void @indirect()
+; CWRLD-NEXT:    ret void
 ;
   %fptr = alloca ptr, addrspace(5)
   store ptr @indirect, ptr addrspace(5) %fptr
@@ -26,15 +35,22 @@ define internal void @direct() {
 }
 
 define amdgpu_kernel void @test_direct_indirect_call() {
-; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    call void @direct()
-; CHECK-NEXT:    ret void
+; OWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call
+; OWRLD-SAME: () #[[ATTR1]] {
+; OWRLD-NEXT:    call void @direct()
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call
+; CWRLD-SAME: () #[[ATTR0]] {
+; CWRLD-NEXT:    call void @direct()
+; CWRLD-NEXT:    ret void
 ;
   call void @direct()
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+;.
+; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index fde1ca5ce02b741..0b15632e204668f 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=AKF_GCN %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor  -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_OWR
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_CWR
 
 define internal void @indirect() {
 ; AKF_GCN-LABEL: define {{[^@]+}}@indirect() {
@@ -22,13 +23,21 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
 ; AKF_GCN-NEXT:    call void [[FP]]()
 ; AKF_GCN-NEXT:    ret void
 ;
-; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
-; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT:    call void [[FP]]()
-; ATTRIBUTOR_GCN-NEXT:    ret void
+; ATTRIBUTOR_OWR-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_OWR-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_OWR-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_OWR-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_OWR-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_OWR-NEXT:    call void [[FP]]()
+; ATTRIBUTOR_OWR-NEXT:    ret void
+;
+; ATTRIBUTOR_CWR-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_CWR-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_CWR-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_CWR-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_CWR-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_CWR-NEXT:    call void @indirect()
+; ATTRIBUTOR_CWR-NEXT:    ret void
 ;
   %fptr = alloca ptr, addrspace(5)
   store ptr @indirect, ptr addrspace(5) %fptr
@@ -42,6 +51,9 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_OWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_OWR: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
+;.
+; ATTRIBUTOR_CWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CWR: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
index 22f90682aa97388..ca73d33edfc8641 100644
--- a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
+++ b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
@@ -1,18 +1,26 @@
-; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV5C %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV4C %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV5O %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV4O %s
 
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
-; No stack objects, only indirect call has to enable scrathch
-; GCN-LABEL: test_indirect_call:
+; No stack objects, only indirect call has to enable scratch
+; GCNO-LABEL: test_indirect_call:
+; GCNC-LABEL: test_indirect_call:
 
-; COV5: .amdhsa_private_segment_fixed_size 0{{$}}
-; COV4: .amdhsa_private_segment_fixed_size 16384{{$}}
+; COV5O: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV5C: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV4C: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV4O: .amdhsa_private_segment_fixed_size 16384{{$}}
 
-; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
+; GCNO: .amdhsa_user_sgpr_private_segment_buffer 1
+; GCNC: .amdhsa_user_sgpr_private_segment_buffer 1
 
-; COV5: .amdhsa_uses_dynamic_stack 1
-; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; COV5O: .amdhsa_uses_dynamic_stack 1
+; COV5C: .amdhsa_uses_dynamic_stack 0
+; GCNO: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; GCNC: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
 define amdgpu_kernel void @test_indirect_call() {
   %fptr = load ptr, ptr addrspace(4) @gv.fptr0
   call void %fptr()
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index 1eebc8e7953e3e4..a723b24b17d70ea 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -1,1109 +1,1443 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_O %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_C %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_O %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_C %s
 
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
-; GCN-LABEL: test_indirect_call_sgpr_ptr:
-; GCN:         .amd_kernel_code_t
-; GCN-NEXT:     amd_code_version_major = 1
-; GCN-NEXT:     amd_code_version_minor = 2
-; GCN-NEXT:     amd_machine_kind = 1
-; GCN-NEXT:     amd_machine_version_major = 7
-; GCN-NEXT:     amd_machine_version_minor = 0
-; GCN-NEXT:     amd_machine_version_stepping = 0
-; GCN-NEXT:     kernel_code_entry_byte_offset = 256
-; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
-; GCN-NEXT:     granulated_workitem_vgpr_count = 10
-; GCN-NEXT:     granulated_wavefront_sgpr_count = 8
-; GCN-NEXT:     priority = 0
-; GCN-NEXT:     float_mode = 240
-; GCN-NEXT:     priv = 0
-; GCN-NEXT:     enable_dx10_clamp = 1
-; GCN-NEXT:     debug_mode = 0
-; GCN-NEXT:     enable_ieee_mode = 1
-; GCN-NEXT:     enable_wgp_mode = 0
-; GCN-NEXT:     enable_mem_ordered = 0
-; GCN-NEXT:     enable_fwd_progress = 0
-; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GCN-NEXT:     user_sgpr_count = 14
-; GCN-NEXT:     enable_trap_handler = 0
-; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
-; GCN-NEXT:     enable_sgpr_workgroup_info = 0
-; GCN-NEXT:     enable_vgpr_workitem_id = 2
-; GCN-NEXT:     enable_exception_msb = 0
-; GCN-NEXT:     granulated_lds_size = 0
-; GCN-NEXT:     enable_exception = 0
-; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GCN-NEXT:     enable_sgpr_queue_ptr = 1
-; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GCN-NEXT:     enable_sgpr_dispatch_id = 1
-; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
-; GCN-NEXT:     enable_sgpr_private_segment_size = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
-; GCN-NEXT:     enable_wavefront_size32 = 0
-; GCN-NEXT:     enable_ordered_append_gds = 0
-; GCN-NEXT:     private_element_size = 1
-; GCN-NEXT:     is_ptr64 = 1
-; GCN-NEXT:     is_dynamic_callstack = 1
-; GCN-NEXT:     is_debug_enabled = 0
-; GCN-NEXT:     is_xnack_enabled = 0
-; GCN-NEXT:     workitem_private_segment_byte_size = 16384
-; GCN-NEXT:     workgroup_group_segment_byte_size = 0
-; GCN-NEXT:     gds_segment_byte_size = 0
-; GCN-NEXT:     kernarg_segment_byte_size = 64
-; GCN-NEXT:     workgroup_fbarrier_count = 0
-; GCN-NEXT:     wavefront_sgpr_count = 68
-; GCN-NEXT:     workitem_vgpr_count = 42
-; GCN-NEXT:     reserved_vgpr_first = 0
-; GCN-NEXT:     reserved_vgpr_count = 0
-; GCN-NEXT:     reserved_sgpr_first = 0
-; GCN-NEXT:     reserved_sgpr_count = 0
-; GCN-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
-; GCN-NEXT:     debug_private_segment_buffer_sgpr = 0
-; GCN-NEXT:     kernarg_segment_alignment = 4
-; GCN-NEXT:     group_segment_alignment = 4
-; GCN-NEXT:     private_segment_alignment = 4
-; GCN-NEXT:     wavefront_size = 6
-; GCN-NEXT:     call_convention = -1
-; GCN-NEXT:     runtime_loader_kernel_symbol = 0
-; GCN-NEXT:    .end_amd_kernel_code_t
-; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GCN-NEXT:    s_add_i32 s12, s12, s17
-; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GCN-NEXT:    s_add_u32 s0, s0, s17
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_mov_b32 s13, s15
-; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_getpc_b64 s[14:15]
-; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GCN-NEXT:    s_add_u32 s8, s8, 8
-; GCN-NEXT:    s_addc_u32 s9, s9, 0
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
-; GCN-NEXT:    s_mov_b32 s14, s16
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GCN-NEXT:    s_endpgm
+; GCN_O-LABEL: test_indirect_call_sgpr_ptr:
+; GCN_O:         .amd_kernel_code_t
+; GCN_O-NEXT:     amd_code_version_major = 1
+; GCN_O-NEXT:     amd_code_version_minor = 2
+; GCN_O-NEXT:     amd_machine_kind = 1
+; GCN_O-NEXT:     amd_machine_version_major = 7
+; GCN_O-NEXT:     amd_machine_version_minor = 0
+; GCN_O-NEXT:     amd_machine_version_stepping = 0
+; GCN_O-NEXT:     kernel_code_entry_byte_offset = 256
+; GCN_O-NEXT:     kernel_code_prefetch_byte_size = 0
+; GCN_O-NEXT:     granulated_workitem_vgpr_count = 10
+; GCN_O-NEXT:     granulated_wavefront_sgpr_count = 8
+; GCN_O-NEXT:     priority = 0
+; GCN_O-NEXT:     float_mode = 240
+; GCN_O-NEXT:     priv = 0
+; GCN_O-NEXT:     enable_dx10_clamp = 1
+; GCN_O-NEXT:     debug_mode = 0
+; GCN_O-NEXT:     enable_ieee_mode = 1
+; GCN_O-NEXT:     enable_wgp_mode = 0
+; GCN_O-NEXT:     enable_mem_ordered = 0
+; GCN_O-NEXT:     enable_fwd_progress = 0
+; GCN_O-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GCN_O-NEXT:     user_sgpr_count = 14
+; GCN_O-NEXT:     enable_trap_handler = 0
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_y = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_info = 0
+; GCN_O-NEXT:     enable_vgpr_workitem_id = 2
+; GCN_O-NEXT:     enable_exception_msb = 0
+; GCN_O-NEXT:     granulated_lds_size = 0
+; GCN_O-NEXT:     enable_exception = 0
+; GCN_O-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GCN_O-NEXT:     enable_sgpr_dispatch_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_queue_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_dispatch_id = 1
+; GCN_O-NEXT:     enable_sgpr_flat_scratch_init = 1
+; GCN_O-NEXT:     enable_sgpr_private_segment_size = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GCN_O-NEXT:     enable_wavefront_size32 = 0
+; GCN_O-NEXT:     enable_ordered_append_gds = 0
+; GCN_O-NEXT:     private_element_size = 1
+; GCN_O-NEXT:     is_ptr64 = 1
+; GCN_O-NEXT:     is_dynamic_callstack = 1
+; GCN_O-NEXT:     is_debug_enabled = 0
+; GCN_O-NEXT:     is_xnack_enabled = 0
+; GCN_O-NEXT:     workitem_private_segment_byte_size = 16384
+; GCN_O-NEXT:     workgroup_group_segment_byte_size = 0
+; GCN_O-NEXT:     gds_segment_byte_size = 0
+; GCN_O-NEXT:     kernarg_segment_byte_size = 64
+; GCN_O-NEXT:     workgroup_fbarrier_count = 0
+; GCN_O-NEXT:     wavefront_sgpr_count = 68
+; GCN_O-NEXT:     workitem_vgpr_count = 42
+; GCN_O-NEXT:     reserved_vgpr_first = 0
+; GCN_O-NEXT:     reserved_vgpr_count = 0
+; GCN_O-NEXT:     reserved_sgpr_first = 0
+; GCN_O-NEXT:     reserved_sgpr_count = 0
+; GCN_O-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GCN_O-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GCN_O-NEXT:     kernarg_segment_alignment = 4
+; GCN_O-NEXT:     group_segment_alignment = 4
+; GCN_O-NEXT:     private_segment_alignment = 4
+; GCN_O-NEXT:     wavefront_size = 6
+; GCN_O-NEXT:     call_convention = -1
+; GCN_O-NEXT:     runtime_loader_kernel_symbol = 0
+; GCN_O-NEXT:    .end_amd_kernel_code_t
+; GCN_O-NEXT:  ; %bb.0:
+; GCN_O-NEXT:    s_mov_b32 s32, 0
+; GCN_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GCN_O-NEXT:    s_add_i32 s12, s12, s17
+; GCN_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN_O-NEXT:    s_add_u32 s0, s0, s17
+; GCN_O-NEXT:    s_addc_u32 s1, s1, 0
+; GCN_O-NEXT:    s_mov_b32 s13, s15
+; GCN_O-NEXT:    s_mov_b32 s12, s14
+; GCN_O-NEXT:    s_getpc_b64 s[14:15]
+; GCN_O-NEXT:    s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
+; GCN_O-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GCN_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GCN_O-NEXT:    s_add_u32 s8, s8, 8
+; GCN_O-NEXT:    s_addc_u32 s9, s9, 0
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GCN_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GCN_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GCN_O-NEXT:    s_mov_b32 s14, s16
+; GCN_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN_O-NEXT:    s_endpgm
 ;
-; GISEL-LABEL: test_indirect_call_sgpr_ptr:
-; GISEL:         .amd_kernel_code_t
-; GISEL-NEXT:     amd_code_version_major = 1
-; GISEL-NEXT:     amd_code_version_minor = 2
-; GISEL-NEXT:     amd_machine_kind = 1
-; GISEL-NEXT:     amd_machine_version_major = 7
-; GISEL-NEXT:     amd_machine_version_minor = 0
-; GISEL-NEXT:     amd_machine_version_stepping = 0
-; GISEL-NEXT:     kernel_code_entry_byte_offset = 256
-; GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
-; GISEL-NEXT:     granulated_workitem_vgpr_count = 10
-; GISEL-NEXT:     granulated_wavefront_sgpr_count = 8
-; GISEL-NEXT:     priority = 0
-; GISEL-NEXT:     float_mode = 240
-; GISEL-NEXT:     priv = 0
-; GISEL-NEXT:     enable_dx10_clamp = 1
-; GISEL-NEXT:     debug_mode = 0
-; GISEL-NEXT:     enable_ieee_mode = 1
-; GISEL-NEXT:     enable_wgp_mode = 0
-; GISEL-NEXT:     enable_mem_ordered = 0
-; GISEL-NEXT:     enable_fwd_progress = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GISEL-NEXT:     user_sgpr_count = 14
-; GISEL-NEXT:     enable_trap_handler = 0
-; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
-; GISEL-NEXT:     enable_vgpr_workitem_id = 2
-; GISEL-NEXT:     enable_exception_msb = 0
-; GISEL-NEXT:     granulated_lds_size = 0
-; GISEL-NEXT:     enable_exception = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
-; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
-; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
-; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
-; GISEL-NEXT:     enable_wavefront_size32 = 0
-; GISEL-NEXT:     enable_ordered_append_gds = 0
-; GISEL-NEXT:     private_element_size = 1
-; GISEL-NEXT:     is_ptr64 = 1
-; GISEL-NEXT:     is_dynamic_callstack = 1
-; GISEL-NEXT:     is_debug_enabled = 0
-; GISEL-NEXT:     is_xnack_enabled = 0
-; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
-; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
-; GISEL-NEXT:     gds_segment_byte_size = 0
-; GISEL-NEXT:     kernarg_segment_byte_size = 64
-; GISEL-NEXT:     workgroup_fbarrier_count = 0
-; GISEL-NEXT:     wavefront_sgpr_count = 68
-; GISEL-NEXT:     workitem_vgpr_count = 42
-; GISEL-NEXT:     reserved_vgpr_first = 0
-; GISEL-NEXT:     reserved_vgpr_count = 0
-; GISEL-NEXT:     reserved_sgpr_first = 0
-; GISEL-NEXT:     reserved_sgpr_count = 0
-; GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
-; GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
-; GISEL-NEXT:     kernarg_segment_alignment = 4
-; GISEL-NEXT:     group_segment_alignment = 4
-; GISEL-NEXT:     private_segment_alignment = 4
-; GISEL-NEXT:     wavefront_size = 6
-; GISEL-NEXT:     call_convention = -1
-; GISEL-NEXT:     runtime_loader_kernel_symbol = 0
-; GISEL-NEXT:    .end_amd_kernel_code_t
-; GISEL-NEXT:  ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_add_i32 s12, s12, s17
-; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GISEL-NEXT:    s_add_u32 s0, s0, s17
-; GISEL-NEXT:    s_addc_u32 s1, s1, 0
-; GISEL-NEXT:    s_mov_b32 s13, s15
-; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_getpc_b64 s[14:15]
-; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
-; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
-; GISEL-NEXT:    s_mov_b32 s14, s16
-; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GISEL-NEXT:    s_endpgm
+; GCN_C-LABEL: test_indirect_call_sgpr_ptr:
+; GCN_C:         .amd_kernel_code_t
+; GCN_C-NEXT:     amd_code_version_major = 1
+; GCN_C-NEXT:     amd_code_version_minor = 2
+; GCN_C-NEXT:     amd_machine_kind = 1
+; GCN_C-NEXT:     amd_machine_version_major = 7
+; GCN_C-NEXT:     amd_machine_version_minor = 0
+; GCN_C-NEXT:     amd_machine_version_stepping = 0
+; GCN_C-NEXT:     kernel_code_entry_byte_offset = 256
+; GCN_C-NEXT:     kernel_code_prefetch_byte_size = 0
+; GCN_C-NEXT:     granulated_workitem_vgpr_count = 0
+; GCN_C-NEXT:     granulated_wavefront_sgpr_count = 0
+; GCN_C-NEXT:     priority = 0
+; GCN_C-NEXT:     float_mode = 240
+; GCN_C-NEXT:     priv = 0
+; GCN_C-NEXT:     enable_dx10_clamp = 1
+; GCN_C-NEXT:     debug_mode = 0
+; GCN_C-NEXT:     enable_ieee_mode = 1
+; GCN_C-NEXT:     enable_wgp_mode = 0
+; GCN_C-NEXT:     enable_mem_ordered = 0
+; GCN_C-NEXT:     enable_fwd_progress = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; GCN_C-NEXT:     user_sgpr_count = 6
+; GCN_C-NEXT:     enable_trap_handler = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_z = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_info = 0
+; GCN_C-NEXT:     enable_vgpr_workitem_id = 0
+; GCN_C-NEXT:     enable_exception_msb = 0
+; GCN_C-NEXT:     granulated_lds_size = 0
+; GCN_C-NEXT:     enable_exception = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GCN_C-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GCN_C-NEXT:     enable_sgpr_queue_ptr = 0
+; GCN_C-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GCN_C-NEXT:     enable_sgpr_dispatch_id = 0
+; GCN_C-NEXT:     enable_sgpr_flat_scratch_init = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_size = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GCN_C-NEXT:     enable_wavefront_size32 = 0
+; GCN_C-NEXT:     enable_ordered_append_gds = 0
+; GCN_C-NEXT:     private_element_size = 1
+; GCN_C-NEXT:     is_ptr64 = 1
+; GCN_C-NEXT:     is_dynamic_callstack = 0
+; GCN_C-NEXT:     is_debug_enabled = 0
+; GCN_C-NEXT:     is_xnack_enabled = 0
+; GCN_C-NEXT:     workitem_private_segment_byte_size = 0
+; GCN_C-NEXT:     workgroup_group_segment_byte_size = 0
+; GCN_C-NEXT:     gds_segment_byte_size = 0
+; GCN_C-NEXT:     kernarg_segment_byte_size = 4
+; GCN_C-NEXT:     workgroup_fbarrier_count = 0
+; GCN_C-NEXT:     wavefront_sgpr_count = 0
+; GCN_C-NEXT:     workitem_vgpr_count = 0
+; GCN_C-NEXT:     reserved_vgpr_first = 0
+; GCN_C-NEXT:     reserved_vgpr_count = 0
+; GCN_C-NEXT:     reserved_sgpr_first = 0
+; GCN_C-NEXT:     reserved_sgpr_count = 0
+; GCN_C-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GCN_C-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GCN_C-NEXT:     kernarg_segment_alignment = 4
+; GCN_C-NEXT:     group_segment_alignment = 4
+; GCN_C-NEXT:     private_segment_alignment = 4
+; GCN_C-NEXT:     wavefront_size = 6
+; GCN_C-NEXT:     call_convention = -1
+; GCN_C-NEXT:     runtime_loader_kernel_symbol = 0
+; GCN_C-NEXT:    .end_amd_kernel_code_t
+; GCN_C-NEXT:  ; %bb.0:
+;
+; GISEL_O-LABEL: test_indirect_call_sgpr_ptr:
+; GISEL_O:         .amd_kernel_code_t
+; GISEL_O-NEXT:     amd_code_version_major = 1
+; GISEL_O-NEXT:     amd_code_version_minor = 2
+; GISEL_O-NEXT:     amd_machine_kind = 1
+; GISEL_O-NEXT:     amd_machine_version_major = 7
+; GISEL_O-NEXT:     amd_machine_version_minor = 0
+; GISEL_O-NEXT:     amd_machine_version_stepping = 0
+; GISEL_O-NEXT:     kernel_code_entry_byte_offset = 256
+; GISEL_O-NEXT:     kernel_code_prefetch_byte_size = 0
+; GISEL_O-NEXT:     granulated_workitem_vgpr_count = 10
+; GISEL_O-NEXT:     granulated_wavefront_sgpr_count = 8
+; GISEL_O-NEXT:     priority = 0
+; GISEL_O-NEXT:     float_mode = 240
+; GISEL_O-NEXT:     priv = 0
+; GISEL_O-NEXT:     enable_dx10_clamp = 1
+; GISEL_O-NEXT:     debug_mode = 0
+; GISEL_O-NEXT:     enable_ieee_mode = 1
+; GISEL_O-NEXT:     enable_wgp_mode = 0
+; GISEL_O-NEXT:     enable_mem_ordered = 0
+; GISEL_O-NEXT:     enable_fwd_progress = 0
+; GISEL_O-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GISEL_O-NEXT:     user_sgpr_count = 14
+; GISEL_O-NEXT:     enable_trap_handler = 0
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_y = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_info = 0
+; GISEL_O-NEXT:     enable_vgpr_workitem_id = 2
+; GISEL_O-NEXT:     enable_exception_msb = 0
+; GISEL_O-NEXT:     granulated_lds_size = 0
+; GISEL_O-NEXT:     enable_exception = 0
+; GISEL_O-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GISEL_O-NEXT:     enable_sgpr_dispatch_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_queue_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_dispatch_id = 1
+; GISEL_O-NEXT:     enable_sgpr_flat_scratch_init = 1
+; GISEL_O-NEXT:     enable_sgpr_private_segment_size = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GISEL_O-NEXT:     enable_wavefront_size32 = 0
+; GISEL_O-NEXT:     enable_ordered_append_gds = 0
+; GISEL_O-NEXT:     private_element_size = 1
+; GISEL_O-NEXT:     is_ptr64 = 1
+; GISEL_O-NEXT:     is_dynamic_callstack = 1
+; GISEL_O-NEXT:     is_debug_enabled = 0
+; GISEL_O-NEXT:     is_xnack_enabled = 0
+; GISEL_O-NEXT:     workitem_private_segment_byte_size = 16384
+; GISEL_O-NEXT:     workgroup_group_segment_byte_size = 0
+; GISEL_O-NEXT:     gds_segment_byte_size = 0
+; GISEL_O-NEXT:     kernarg_segment_byte_size = 64
+; GISEL_O-NEXT:     workgroup_fbarrier_count = 0
+; GISEL_O-NEXT:     wavefront_sgpr_count = 68
+; GISEL_O-NEXT:     workitem_vgpr_count = 42
+; GISEL_O-NEXT:     reserved_vgpr_first = 0
+; GISEL_O-NEXT:     reserved_vgpr_count = 0
+; GISEL_O-NEXT:     reserved_sgpr_first = 0
+; GISEL_O-NEXT:     reserved_sgpr_count = 0
+; GISEL_O-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL_O-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GISEL_O-NEXT:     kernarg_segment_alignment = 4
+; GISEL_O-NEXT:     group_segment_alignment = 4
+; GISEL_O-NEXT:     private_segment_alignment = 4
+; GISEL_O-NEXT:     wavefront_size = 6
+; GISEL_O-NEXT:     call_convention = -1
+; GISEL_O-NEXT:     runtime_loader_kernel_symbol = 0
+; GISEL_O-NEXT:    .end_amd_kernel_code_t
+; GISEL_O-NEXT:  ; %bb.0:
+; GISEL_O-NEXT:    s_mov_b32 s32, 0
+; GISEL_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL_O-NEXT:    s_add_i32 s12, s12, s17
+; GISEL_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL_O-NEXT:    s_add_u32 s0, s0, s17
+; GISEL_O-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL_O-NEXT:    s_mov_b32 s13, s15
+; GISEL_O-NEXT:    s_mov_b32 s12, s14
+; GISEL_O-NEXT:    s_getpc_b64 s[14:15]
+; GISEL_O-NEXT:    s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
+; GISEL_O-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL_O-NEXT:    s_add_u32 s8, s8, 8
+; GISEL_O-NEXT:    s_addc_u32 s9, s9, 0
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
+; GISEL_O-NEXT:    v_or_b32_e32 v31, v0, v1
+; GISEL_O-NEXT:    s_mov_b32 s14, s16
+; GISEL_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL_O-NEXT:    s_endpgm
+;
+; GISEL_C-LABEL: test_indirect_call_sgpr_ptr:
+; GISEL_C:         .amd_kernel_code_t
+; GISEL_C-NEXT:     amd_code_version_major = 1
+; GISEL_C-NEXT:     amd_code_version_minor = 2
+; GISEL_C-NEXT:     amd_machine_kind = 1
+; GISEL_C-NEXT:     amd_machine_version_major = 7
+; GISEL_C-NEXT:     amd_machine_version_minor = 0
+; GISEL_C-NEXT:     amd_machine_version_stepping = 0
+; GISEL_C-NEXT:     kernel_code_entry_byte_offset = 256
+; GISEL_C-NEXT:     kernel_code_prefetch_byte_size = 0
+; GISEL_C-NEXT:     granulated_workitem_vgpr_count = 0
+; GISEL_C-NEXT:     granulated_wavefront_sgpr_count = 0
+; GISEL_C-NEXT:     priority = 0
+; GISEL_C-NEXT:     float_mode = 240
+; GISEL_C-NEXT:     priv = 0
+; GISEL_C-NEXT:     enable_dx10_clamp = 1
+; GISEL_C-NEXT:     debug_mode = 0
+; GISEL_C-NEXT:     enable_ieee_mode = 1
+; GISEL_C-NEXT:     enable_wgp_mode = 0
+; GISEL_C-NEXT:     enable_mem_ordered = 0
+; GISEL_C-NEXT:     enable_fwd_progress = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; GISEL_C-NEXT:     user_sgpr_count = 6
+; GISEL_C-NEXT:     enable_trap_handler = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_z = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_info = 0
+; GISEL_C-NEXT:     enable_vgpr_workitem_id = 0
+; GISEL_C-NEXT:     enable_exception_msb = 0
+; GISEL_C-NEXT:     granulated_lds_size = 0
+; GISEL_C-NEXT:     enable_exception = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GISEL_C-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GISEL_C-NEXT:     enable_sgpr_queue_ptr = 0
+; GISEL_C-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GISEL_C-NEXT:     enable_sgpr_dispatch_id = 0
+; GISEL_C-NEXT:     enable_sgpr_flat_scratch_init = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_size = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GISEL_C-NEXT:     enable_wavefront_size32 = 0
+; GISEL_C-NEXT:     enable_ordered_append_gds = 0
+; GISEL_C-NEXT:     private_element_size = 1
+; GISEL_C-NEXT:     is_ptr64 = 1
+; GISEL_C-NEXT:     is_dynamic_callstack = 0
+; GISEL_C-NEXT:     is_debug_enabled = 0
+; GISEL_C-NEXT:     is_xnack_enabled = 0
+; GISEL_C-NEXT:     workitem_private_segment_byte_size = 0
+; GISEL_C-NEXT:     workgroup_group_segment_byte_size = 0
+; GISEL_C-NEXT:     gds_segment_byte_size = 0
+; GISEL_C-NEXT:     kernarg_segment_byte_size = 4
+; GISEL_C-NEXT:     workgroup_fbarrier_count = 0
+; GISEL_C-NEXT:     wavefront_sgpr_count = 0
+; GISEL_C-NEXT:     workitem_vgpr_count = 0
+; GISEL_C-NEXT:     reserved_vgpr_first = 0
+; GISEL_C-NEXT:     reserved_vgpr_count = 0
+; GISEL_C-NEXT:     reserved_sgpr_first = 0
+; GISEL_C-NEXT:     reserved_sgpr_count = 0
+; GISEL_C-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL_C-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GISEL_C-NEXT:     kernarg_segment_alignment = 4
+; GISEL_C-NEXT:     group_segment_alignment = 4
+; GISEL_C-NEXT:     private_segment_alignment = 4
+; GISEL_C-NEXT:     wavefront_size = 6
+; GISEL_C-NEXT:     call_convention = -1
+; GISEL_C-NEXT:     runtime_loader_kernel_symbol = 0
+; GISEL_C-NEXT:    .end_amd_kernel_code_t
+; GISEL_C-NEXT:  ; %bb.0:
   %fptr = load ptr, ptr addrspace(4) @gv.fptr0
   call void %fptr()
   ret void
 }
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
-; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
-; GCN:         .amd_kernel_code_t
-; GCN-NEXT:     amd_code_version_major = 1
-; GCN-NEXT:     amd_code_version_minor = 2
-; GCN-NEXT:     amd_machine_kind = 1
-; GCN-NEXT:     amd_machine_version_major = 7
-; GCN-NEXT:     amd_machine_version_minor = 0
-; GCN-NEXT:     amd_machine_version_stepping = 0
-; GCN-NEXT:     kernel_code_entry_byte_offset = 256
-; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
-; GCN-NEXT:     granulated_workitem_vgpr_count = 10
-; GCN-NEXT:     granulated_wavefront_sgpr_count = 8
-; GCN-NEXT:     priority = 0
-; GCN-NEXT:     float_mode = 240
-; GCN-NEXT:     priv = 0
-; GCN-NEXT:     enable_dx10_clamp = 1
-; GCN-NEXT:     debug_mode = 0
-; GCN-NEXT:     enable_ieee_mode = 1
-; GCN-NEXT:     enable_wgp_mode = 0
-; GCN-NEXT:     enable_mem_ordered = 0
-; GCN-NEXT:     enable_fwd_progress = 0
-; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GCN-NEXT:     user_sgpr_count = 14
-; GCN-NEXT:     enable_trap_handler = 0
-; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
-; GCN-NEXT:     enable_sgpr_workgroup_info = 0
-; GCN-NEXT:     enable_vgpr_workitem_id = 2
-; GCN-NEXT:     enable_exception_msb = 0
-; GCN-NEXT:     granulated_lds_size = 0
-; GCN-NEXT:     enable_exception = 0
-; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GCN-NEXT:     enable_sgpr_queue_ptr = 1
-; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GCN-NEXT:     enable_sgpr_dispatch_id = 1
-; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
-; GCN-NEXT:     enable_sgpr_private_segment_size = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
-; GCN-NEXT:     enable_wavefront_size32 = 0
-; GCN-NEXT:     enable_ordered_append_gds = 0
-; GCN-NEXT:     private_element_size = 1
-; GCN-NEXT:     is_ptr64 = 1
-; GCN-NEXT:     is_dynamic_callstack = 1
-; GCN-NEXT:     is_debug_enabled = 0
-; GCN-NEXT:     is_xnack_enabled = 0
-; GCN-NEXT:     workitem_private_segment_byte_size = 16384
-; GCN-NEXT:     workgroup_group_segment_byte_size = 0
-; GCN-NEXT:     gds_segment_byte_size = 0
-; GCN-NEXT:     kernarg_segment_byte_size = 64
-; GCN-NEXT:     workgroup_fbarrier_count = 0
-; GCN-NEXT:     wavefront_sgpr_count = 68
-; GCN-NEXT:     workitem_vgpr_count = 42
-; GCN-NEXT:     reserved_vgpr_first = 0
-; GCN-NEXT:     reserved_vgpr_count = 0
-; GCN-NEXT:     reserved_sgpr_first = 0
-; GCN-NEXT:     reserved_sgpr_count = 0
-; GCN-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
-; GCN-NEXT:     debug_private_segment_buffer_sgpr = 0
-; GCN-NEXT:     kernarg_segment_alignment = 4
-; GCN-NEXT:     group_segment_alignment = 4
-; GCN-NEXT:     private_segment_alignment = 4
-; GCN-NEXT:     wavefront_size = 6
-; GCN-NEXT:     call_convention = -1
-; GCN-NEXT:     runtime_loader_kernel_symbol = 0
-; GCN-NEXT:    .end_amd_kernel_code_t
-; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GCN-NEXT:    s_add_i32 s12, s12, s17
-; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GCN-NEXT:    s_add_u32 s0, s0, s17
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_mov_b32 s13, s15
-; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_getpc_b64 s[14:15]
-; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GCN-NEXT:    s_add_u32 s8, s8, 8
-; GCN-NEXT:    s_addc_u32 s9, s9, 0
-; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
-; GCN-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GCN-NEXT:    s_mov_b32 s14, s16
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GCN-NEXT:    s_endpgm
+; GCN_O-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GCN_O:         .amd_kernel_code_t
+; GCN_O-NEXT:     amd_code_version_major = 1
+; GCN_O-NEXT:     amd_code_version_minor = 2
+; GCN_O-NEXT:     amd_machine_kind = 1
+; GCN_O-NEXT:     amd_machine_version_major = 7
+; GCN_O-NEXT:     amd_machine_version_minor = 0
+; GCN_O-NEXT:     amd_machine_version_stepping = 0
+; GCN_O-NEXT:     kernel_code_entry_byte_offset = 256
+; GCN_O-NEXT:     kernel_code_prefetch_byte_size = 0
+; GCN_O-NEXT:     granulated_workitem_vgpr_count = 10
+; GCN_O-NEXT:     granulated_wavefront_sgpr_count = 8
+; GCN_O-NEXT:     priority = 0
+; GCN_O-NEXT:     float_mode = 240
+; GCN_O-NEXT:     priv = 0
+; GCN_O-NEXT:     enable_dx10_clamp = 1
+; GCN_O-NEXT:     debug_mode = 0
+; GCN_O-NEXT:     enable_ieee_mode = 1
+; GCN_O-NEXT:     enable_wgp_mode = 0
+; GCN_O-NEXT:     enable_mem_ordered = 0
+; GCN_O-NEXT:     enable_fwd_progress = 0
+; GCN_O-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GCN_O-NEXT:     user_sgpr_count = 14
+; GCN_O-NEXT:     enable_trap_handler = 0
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_y = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_info = 0
+; GCN_O-NEXT:     enable_vgpr_workitem_id = 2
+; GCN_O-NEXT:     enable_exception_msb = 0
+; GCN_O-NEXT:     granulated_lds_size = 0
+; GCN_O-NEXT:     enable_exception = 0
+; GCN_O-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GCN_O-NEXT:     enable_sgpr_dispatch_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_queue_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_dispatch_id = 1
+; GCN_O-NEXT:     enable_sgpr_flat_scratch_init = 1
+; GCN_O-NEXT:     enable_sgpr_private_segment_size = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GCN_O-NEXT:     enable_wavefront_size32 = 0
+; GCN_O-NEXT:     enable_ordered_append_gds = 0
+; GCN_O-NEXT:     private_element_size = 1
+; GCN_O-NEXT:     is_ptr64 = 1
+; GCN_O-NEXT:     is_dynamic_callstack = 1
+; GCN_O-NEXT:     is_debug_enabled = 0
+; GCN_O-NEXT:     is_xnack_enabled = 0
+; GCN_O-NEXT:     workitem_private_segment_byte_size = 16384
+; GCN_O-NEXT:     workgroup_group_segment_byte_size = 0
+; GCN_O-NEXT:     gds_segment_byte_size = 0
+; GCN_O-NEXT:     kernarg_segment_byte_size = 64
+; GCN_O-NEXT:     workgroup_fbarrier_count = 0
+; GCN_O-NEXT:     wavefront_sgpr_count = 68
+; GCN_O-NEXT:     workitem_vgpr_count = 42
+; GCN_O-NEXT:     reserved_vgpr_first = 0
+; GCN_O-NEXT:     reserved_vgpr_count = 0
+; GCN_O-NEXT:     reserved_sgpr_first = 0
+; GCN_O-NEXT:     reserved_sgpr_count = 0
+; GCN_O-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GCN_O-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GCN_O-NEXT:     kernarg_segment_alignment = 4
+; GCN_O-NEXT:     group_segment_alignment = 4
+; GCN_O-NEXT:     private_segment_alignment = 4
+; GCN_O-NEXT:     wavefront_size = 6
+; GCN_O-NEXT:     call_convention = -1
+; GCN_O-NEXT:     runtime_loader_kernel_symbol = 0
+; GCN_O-NEXT:    .end_amd_kernel_code_t
+; GCN_O-NEXT:  ; %bb.0:
+; GCN_O-NEXT:    s_mov_b32 s32, 0
+; GCN_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GCN_O-NEXT:    s_add_i32 s12, s12, s17
+; GCN_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN_O-NEXT:    s_add_u32 s0, s0, s17
+; GCN_O-NEXT:    s_addc_u32 s1, s1, 0
+; GCN_O-NEXT:    s_mov_b32 s13, s15
+; GCN_O-NEXT:    s_mov_b32 s12, s14
+; GCN_O-NEXT:    s_getpc_b64 s[14:15]
+; GCN_O-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
+; GCN_O-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GCN_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GCN_O-NEXT:    s_add_u32 s8, s8, 8
+; GCN_O-NEXT:    s_addc_u32 s9, s9, 0
+; GCN_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GCN_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GCN_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GCN_O-NEXT:    s_mov_b32 s14, s16
+; GCN_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN_O-NEXT:    s_endpgm
+;
+; GCN_C-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GCN_C:         .amd_kernel_code_t
+; GCN_C-NEXT:     amd_code_version_major = 1
+; GCN_C-NEXT:     amd_code_version_minor = 2
+; GCN_C-NEXT:     amd_machine_kind = 1
+; GCN_C-NEXT:     amd_machine_version_major = 7
+; GCN_C-NEXT:     amd_machine_version_minor = 0
+; GCN_C-NEXT:     amd_machine_version_stepping = 0
+; GCN_C-NEXT:     kernel_code_entry_byte_offset = 256
+; GCN_C-NEXT:     kernel_code_prefetch_byte_size = 0
+; GCN_C-NEXT:     granulated_workitem_vgpr_count = 0
+; GCN_C-NEXT:     granulated_wavefront_sgpr_count = 0
+; GCN_C-NEXT:     priority = 0
+; GCN_C-NEXT:     float_mode = 240
+; GCN_C-NEXT:     priv = 0
+; GCN_C-NEXT:     enable_dx10_clamp = 1
+; GCN_C-NEXT:     debug_mode = 0
+; GCN_C-NEXT:     enable_ieee_mode = 1
+; GCN_C-NEXT:     enable_wgp_mode = 0
+; GCN_C-NEXT:     enable_mem_ordered = 0
+; GCN_C-NEXT:     enable_fwd_progress = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; GCN_C-NEXT:     user_sgpr_count = 6
+; GCN_C-NEXT:     enable_trap_handler = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_z = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_info = 0
+; GCN_C-NEXT:     enable_vgpr_workitem_id = 0
+; GCN_C-NEXT:     enable_exception_msb = 0
+; GCN_C-NEXT:     granulated_lds_size = 0
+; GCN_C-NEXT:     enable_exception = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GCN_C-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GCN_C-NEXT:     enable_sgpr_queue_ptr = 0
+; GCN_C-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GCN_C-NEXT:     enable_sgpr_dispatch_id = 0
+; GCN_C-NEXT:     enable_sgpr_flat_scratch_init = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_size = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GCN_C-NEXT:     enable_wavefront_size32 = 0
+; GCN_C-NEXT:     enable_ordered_append_gds = 0
+; GCN_C-NEXT:     private_element_size = 1
+; GCN_C-NEXT:     is_ptr64 = 1
+; GCN_C-NEXT:     is_dynamic_callstack = 0
+; GCN_C-NEXT:     is_debug_enabled = 0
+; GCN_C-NEXT:     is_xnack_enabled = 0
+; GCN_C-NEXT:     workitem_private_segment_byte_size = 0
+; GCN_C-NEXT:     workgroup_group_segment_byte_size = 0
+; GCN_C-NEXT:     gds_segment_byte_size = 0
+; GCN_C-NEXT:     kernarg_segment_byte_size = 4
+; GCN_C-NEXT:     workgroup_fbarrier_count = 0
+; GCN_C-NEXT:     wavefront_sgpr_count = 0
+; GCN_C-NEXT:     workitem_vgpr_count = 0
+; GCN_C-NEXT:     reserved_vgpr_first = 0
+; GCN_C-NEXT:     reserved_vgpr_count = 0
+; GCN_C-NEXT:     reserved_sgpr_first = 0
+; GCN_C-NEXT:     reserved_sgpr_count = 0
+; GCN_C-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GCN_C-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GCN_C-NEXT:     kernarg_segment_alignment = 4
+; GCN_C-NEXT:     group_segment_alignment = 4
+; GCN_C-NEXT:     private_segment_alignment = 4
+; GCN_C-NEXT:     wavefront_size = 6
+; GCN_C-NEXT:     call_convention = -1
+; GCN_C-NEXT:     runtime_loader_kernel_symbol = 0
+; GCN_C-NEXT:    .end_amd_kernel_code_t
+; GCN_C-NEXT:  ; %bb.0:
 ;
-; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
-; GISEL:         .amd_kernel_code_t
-; GISEL-NEXT:     amd_code_version_major = 1
-; GISEL-NEXT:     amd_code_version_minor = 2
-; GISEL-NEXT:     amd_machine_kind = 1
-; GISEL-NEXT:     amd_machine_version_major = 7
-; GISEL-NEXT:     amd_machine_version_minor = 0
-; GISEL-NEXT:     amd_machine_version_stepping = 0
-; GISEL-NEXT:     kernel_code_entry_byte_offset = 256
-; GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
-; GISEL-NEXT:     granulated_workitem_vgpr_count = 10
-; GISEL-NEXT:     granulated_wavefront_sgpr_count = 8
-; GISEL-NEXT:     priority = 0
-; GISEL-NEXT:     float_mode = 240
-; GISEL-NEXT:     priv = 0
-; GISEL-NEXT:     enable_dx10_clamp = 1
-; GISEL-NEXT:     debug_mode = 0
-; GISEL-NEXT:     enable_ieee_mode = 1
-; GISEL-NEXT:     enable_wgp_mode = 0
-; GISEL-NEXT:     enable_mem_ordered = 0
-; GISEL-NEXT:     enable_fwd_progress = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GISEL-NEXT:     user_sgpr_count = 14
-; GISEL-NEXT:     enable_trap_handler = 0
-; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
-; GISEL-NEXT:     enable_vgpr_workitem_id = 2
-; GISEL-NEXT:     enable_exception_msb = 0
-; GISEL-NEXT:     granulated_lds_size = 0
-; GISEL-NEXT:     enable_exception = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
-; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
-; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
-; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
-; GISEL-NEXT:     enable_wavefront_size32 = 0
-; GISEL-NEXT:     enable_ordered_append_gds = 0
-; GISEL-NEXT:     private_element_size = 1
-; GISEL-NEXT:     is_ptr64 = 1
-; GISEL-NEXT:     is_dynamic_callstack = 1
-; GISEL-NEXT:     is_debug_enabled = 0
-; GISEL-NEXT:     is_xnack_enabled = 0
-; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
-; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
-; GISEL-NEXT:     gds_segment_byte_size = 0
-; GISEL-NEXT:     kernarg_segment_byte_size = 64
-; GISEL-NEXT:     workgroup_fbarrier_count = 0
-; GISEL-NEXT:     wavefront_sgpr_count = 68
-; GISEL-NEXT:     workitem_vgpr_count = 42
-; GISEL-NEXT:     reserved_vgpr_first = 0
-; GISEL-NEXT:     reserved_vgpr_count = 0
-; GISEL-NEXT:     reserved_sgpr_first = 0
-; GISEL-NEXT:     reserved_sgpr_count = 0
-; GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
-; GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
-; GISEL-NEXT:     kernarg_segment_alignment = 4
-; GISEL-NEXT:     group_segment_alignment = 4
-; GISEL-NEXT:     private_segment_alignment = 4
-; GISEL-NEXT:     wavefront_size = 6
-; GISEL-NEXT:     call_convention = -1
-; GISEL-NEXT:     runtime_loader_kernel_symbol = 0
-; GISEL-NEXT:    .end_amd_kernel_code_t
-; GISEL-NEXT:  ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_add_i32 s12, s12, s17
-; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GISEL-NEXT:    s_add_u32 s0, s0, s17
-; GISEL-NEXT:    s_addc_u32 s1, s1, 0
-; GISEL-NEXT:    s_mov_b32 s13, s15
-; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_getpc_b64 s[14:15]
-; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
-; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
-; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GISEL-NEXT:    s_mov_b32 s14, s16
-; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GISEL-NEXT:    s_endpgm
+; GISEL_O-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GISEL_O:         .amd_kernel_code_t
+; GISEL_O-NEXT:     amd_code_version_major = 1
+; GISEL_O-NEXT:     amd_code_version_minor = 2
+; GISEL_O-NEXT:     amd_machine_kind = 1
+; GISEL_O-NEXT:     amd_machine_version_major = 7
+; GISEL_O-NEXT:     amd_machine_version_minor = 0
+; GISEL_O-NEXT:     amd_machine_version_stepping = 0
+; GISEL_O-NEXT:     kernel_code_entry_byte_offset = 256
+; GISEL_O-NEXT:     kernel_code_prefetch_byte_size = 0
+; GISEL_O-NEXT:     granulated_workitem_vgpr_count = 10
+; GISEL_O-NEXT:     granulated_wavefront_sgpr_count = 8
+; GISEL_O-NEXT:     priority = 0
+; GISEL_O-NEXT:     float_mode = 240
+; GISEL_O-NEXT:     priv = 0
+; GISEL_O-NEXT:     enable_dx10_clamp = 1
+; GISEL_O-NEXT:     debug_mode = 0
+; GISEL_O-NEXT:     enable_ieee_mode = 1
+; GISEL_O-NEXT:     enable_wgp_mode = 0
+; GISEL_O-NEXT:     enable_mem_ordered = 0
+; GISEL_O-NEXT:     enable_fwd_progress = 0
+; GISEL_O-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GISEL_O-NEXT:     user_sgpr_count = 14
+; GISEL_O-NEXT:     enable_trap_handler = 0
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_y = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_info = 0
+; GISEL_O-NEXT:     enable_vgpr_workitem_id = 2
+; GISEL_O-NEXT:     enable_exception_msb = 0
+; GISEL_O-NEXT:     granulated_lds_size = 0
+; GISEL_O-NEXT:     enable_exception = 0
+; GISEL_O-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GISEL_O-NEXT:     enable_sgpr_dispatch_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_queue_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_dispatch_id = 1
+; GISEL_O-NEXT:     enable_sgpr_flat_scratch_init = 1
+; GISEL_O-NEXT:     enable_sgpr_private_segment_size = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GISEL_O-NEXT:     enable_wavefront_size32 = 0
+; GISEL_O-NEXT:     enable_ordered_append_gds = 0
+; GISEL_O-NEXT:     private_element_size = 1
+; GISEL_O-NEXT:     is_ptr64 = 1
+; GISEL_O-NEXT:     is_dynamic_callstack = 1
+; GISEL_O-NEXT:     is_debug_enabled = 0
+; GISEL_O-NEXT:     is_xnack_enabled = 0
+; GISEL_O-NEXT:     workitem_private_segment_byte_size = 16384
+; GISEL_O-NEXT:     workgroup_group_segment_byte_size = 0
+; GISEL_O-NEXT:     gds_segment_byte_size = 0
+; GISEL_O-NEXT:     kernarg_segment_byte_size = 64
+; GISEL_O-NEXT:     workgroup_fbarrier_count = 0
+; GISEL_O-NEXT:     wavefront_sgpr_count = 68
+; GISEL_O-NEXT:     workitem_vgpr_count = 42
+; GISEL_O-NEXT:     reserved_vgpr_first = 0
+; GISEL_O-NEXT:     reserved_vgpr_count = 0
+; GISEL_O-NEXT:     reserved_sgpr_first = 0
+; GISEL_O-NEXT:     reserved_sgpr_count = 0
+; GISEL_O-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL_O-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GISEL_O-NEXT:     kernarg_segment_alignment = 4
+; GISEL_O-NEXT:     group_segment_alignment = 4
+; GISEL_O-NEXT:     private_segment_alignment = 4
+; GISEL_O-NEXT:     wavefront_size = 6
+; GISEL_O-NEXT:     call_convention = -1
+; GISEL_O-NEXT:     runtime_loader_kernel_symbol = 0
+; GISEL_O-NEXT:    .end_amd_kernel_code_t
+; GISEL_O-NEXT:  ; %bb.0:
+; GISEL_O-NEXT:    s_mov_b32 s32, 0
+; GISEL_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL_O-NEXT:    s_add_i32 s12, s12, s17
+; GISEL_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL_O-NEXT:    s_add_u32 s0, s0, s17
+; GISEL_O-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL_O-NEXT:    s_mov_b32 s13, s15
+; GISEL_O-NEXT:    s_mov_b32 s12, s14
+; GISEL_O-NEXT:    s_getpc_b64 s[14:15]
+; GISEL_O-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
+; GISEL_O-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GISEL_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL_O-NEXT:    s_add_u32 s8, s8, 8
+; GISEL_O-NEXT:    s_addc_u32 s9, s9, 0
+; GISEL_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL_O-NEXT:    s_mov_b32 s14, s16
+; GISEL_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL_O-NEXT:    s_endpgm
+;
+; GISEL_C-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GISEL_C:         .amd_kernel_code_t
+; GISEL_C-NEXT:     amd_code_version_major = 1
+; GISEL_C-NEXT:     amd_code_version_minor = 2
+; GISEL_C-NEXT:     amd_machine_kind = 1
+; GISEL_C-NEXT:     amd_machine_version_major = 7
+; GISEL_C-NEXT:     amd_machine_version_minor = 0
+; GISEL_C-NEXT:     amd_machine_version_stepping = 0
+; GISEL_C-NEXT:     kernel_code_entry_byte_offset = 256
+; GISEL_C-NEXT:     kernel_code_prefetch_byte_size = 0
+; GISEL_C-NEXT:     granulated_workitem_vgpr_count = 0
+; GISEL_C-NEXT:     granulated_wavefront_sgpr_count = 0
+; GISEL_C-NEXT:     priority = 0
+; GISEL_C-NEXT:     float_mode = 240
+; GISEL_C-NEXT:     priv = 0
+; GISEL_C-NEXT:     enable_dx10_clamp = 1
+; GISEL_C-NEXT:     debug_mode = 0
+; GISEL_C-NEXT:     enable_ieee_mode = 1
+; GISEL_C-NEXT:     enable_wgp_mode = 0
+; GISEL_C-NEXT:     enable_mem_ordered = 0
+; GISEL_C-NEXT:     enable_fwd_progress = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; GISEL_C-NEXT:     user_sgpr_count = 6
+; GISEL_C-NEXT:     enable_trap_handler = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_z = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_info = 0
+; GISEL_C-NEXT:     enable_vgpr_workitem_id = 0
+; GISEL_C-NEXT:     enable_exception_msb = 0
+; GISEL_C-NEXT:     granulated_lds_size = 0
+; GISEL_C-NEXT:     enable_exception = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GISEL_C-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GISEL_C-NEXT:     enable_sgpr_queue_ptr = 0
+; GISEL_C-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GISEL_C-NEXT:     enable_sgpr_dispatch_id = 0
+; GISEL_C-NEXT:     enable_sgpr_flat_scratch_init = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_size = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GISEL_C-NEXT:     enable_wavefront_size32 = 0
+; GISEL_C-NEXT:     enable_ordered_append_gds = 0
+; GISEL_C-NEXT:     private_element_size = 1
+; GISEL_C-NEXT:     is_ptr64 = 1
+; GISEL_C-NEXT:     is_dynamic_callstack = 0
+; GISEL_C-NEXT:     is_debug_enabled = 0
+; GISEL_C-NEXT:     is_xnack_enabled = 0
+; GISEL_C-NEXT:     workitem_private_segment_byte_size = 0
+; GISEL_C-NEXT:     workgroup_group_segment_byte_size = 0
+; GISEL_C-NEXT:     gds_segment_byte_size = 0
+; GISEL_C-NEXT:     kernarg_segment_byte_size = 4
+; GISEL_C-NEXT:     workgroup_fbarrier_count = 0
+; GISEL_C-NEXT:     wavefront_sgpr_count = 0
+; GISEL_C-NEXT:     workitem_vgpr_count = 0
+; GISEL_C-NEXT:     reserved_vgpr_first = 0
+; GISEL_C-NEXT:     reserved_vgpr_count = 0
+; GISEL_C-NEXT:     reserved_sgpr_first = 0
+; GISEL_C-NEXT:     reserved_sgpr_count = 0
+; GISEL_C-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL_C-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GISEL_C-NEXT:     kernarg_segment_alignment = 4
+; GISEL_C-NEXT:     group_segment_alignment = 4
+; GISEL_C-NEXT:     private_segment_alignment = 4
+; GISEL_C-NEXT:     wavefront_size = 6
+; GISEL_C-NEXT:     call_convention = -1
+; GISEL_C-NEXT:     runtime_loader_kernel_symbol = 0
+; GISEL_C-NEXT:    .end_amd_kernel_code_t
+; GISEL_C-NEXT:  ; %bb.0:
   %fptr = load ptr, ptr addrspace(4) @gv.fptr1
   call void %fptr(i32 123)
   ret void
 }
 
 define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB2_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB2_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB2_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB2_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call void %fptr()
   ret void
 }
 
 define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GCN-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    v_mov_b32_e32 v0, v2
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    ; implicit-def: $vgpr2
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB3_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:    v_mov_b32_e32 v2, 0x7b
+; GCN_O-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v2
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    ; implicit-def: $vgpr2
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB3_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB3_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB3_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call void %fptr(i32 123)
   ret void
 }
 
 define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    v_mov_b32_e32 v2, v0
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB4_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    v_mov_b32_e32 v2, v0
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB4_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    v_mov_b32_e32 v1, v0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB4_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_add_i32_e32 v0, vcc, 1, v1
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    v_mov_b32_e32 v1, v0
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB4_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v1
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   %a = call i32 %fptr()
   %b = add i32 %a, 1
   ret i32 %b
 }
 
 define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
-; GCN:       ; %bb.0: ; %bb0
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 20
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT:    s_and_saveexec_b64 s[46:47], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB5_4
-; GCN-NEXT:  ; %bb.1: ; %bb1
-; GCN-NEXT:    s_mov_b64 s[48:49], exec
-; GCN-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[50:51], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[50:51]
-; GCN-NEXT:    s_cbranch_execnz .LBB5_2
-; GCN-NEXT:  ; %bb.3:
-; GCN-NEXT:    s_mov_b64 exec, s[48:49]
-; GCN-NEXT:  .LBB5_4: ; %bb2
-; GCN-NEXT:    s_or_b64 exec, exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 20
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GCN_O:       ; %bb.0: ; %bb0
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 20
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    v_and_b32_e32 v2, 1, v2
+; GCN_O-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
+; GCN_O-NEXT:    s_and_saveexec_b64 s[46:47], vcc
+; GCN_O-NEXT:    s_cbranch_execz .LBB5_4
+; GCN_O-NEXT:  ; %bb.1: ; %bb1
+; GCN_O-NEXT:    s_mov_b64 s[48:49], exec
+; GCN_O-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[50:51], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[50:51]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB5_2
+; GCN_O-NEXT:  ; %bb.3:
+; GCN_O-NEXT:    s_mov_b64 exec, s[48:49]
+; GCN_O-NEXT:  .LBB5_4: ; %bb2
+; GCN_O-NEXT:    s_or_b64 exec, exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 20
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
-; GISEL:       ; %bb.0: ; %bb0
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 20
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    s_and_saveexec_b64 s[46:47], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB5_4
-; GISEL-NEXT:  ; %bb.1: ; %bb1
-; GISEL-NEXT:    s_mov_b64 s[48:49], exec
-; GISEL-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[50:51], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[50:51]
-; GISEL-NEXT:    s_cbranch_execnz .LBB5_2
-; GISEL-NEXT:  ; %bb.3:
-; GISEL-NEXT:    s_mov_b64 exec, s[48:49]
-; GISEL-NEXT:  .LBB5_4: ; %bb2
-; GISEL-NEXT:    s_or_b64 exec, exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 20
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GCN_C:       ; %bb.0: ; %bb0
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_C-NEXT:    v_and_b32_e32 v0, 1, v2
+; GCN_C-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN_C-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN_C-NEXT:  ; %bb.1: ; %bb1
+; GCN_C-NEXT:    ; divergent unreachable
+; GCN_C-NEXT:  ; %bb.2: ; %UnifiedReturnBlock
+; GCN_C-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN_C-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GISEL_O:       ; %bb.0: ; %bb0
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 20
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    v_and_b32_e32 v2, 1, v2
+; GISEL_O-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[46:47], vcc
+; GISEL_O-NEXT:    s_cbranch_execz .LBB5_4
+; GISEL_O-NEXT:  ; %bb.1: ; %bb1
+; GISEL_O-NEXT:    s_mov_b64 s[48:49], exec
+; GISEL_O-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[50:51], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[50:51]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB5_2
+; GISEL_O-NEXT:  ; %bb.3:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[48:49]
+; GISEL_O-NEXT:  .LBB5_4: ; %bb2
+; GISEL_O-NEXT:    s_or_b64 exec, exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 20
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GISEL_C:       ; %bb.0: ; %bb0
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_C-NEXT:    v_and_b32_e32 v0, 1, v2
+; GISEL_C-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL_C-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL_C-NEXT:  ; %bb.1: ; %bb1
+; GISEL_C-NEXT:    ; divergent unreachable
+; GISEL_C-NEXT:  ; %bb.2: ; %UnifiedReturnBlock
+; GISEL_C-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL_C-NEXT:    s_setpc_b64 s[30:31]
 bb0:
   br i1 %cond, label %bb1, label %bb2
 
@@ -1116,393 +1450,409 @@ bb2:
 }
 
 define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s5, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[6:7], exec
-; GCN-NEXT:    s_movk_i32 s4, 0x7b
-; GCN-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s8, v0
-; GCN-NEXT:    v_readfirstlane_b32 s9, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    s_xor_b64 exec, exec, s[10:11]
-; GCN-NEXT:    s_cbranch_execnz .LBB6_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s5
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s5, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[6:7], exec
+; GCN_O-NEXT:    s_movk_i32 s4, 0x7b
+; GCN_O-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s8, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s9, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[10:11], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[10:11]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB6_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s5
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s5, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GISEL-NEXT:    s_movk_i32 s4, 0x7b
-; GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s8, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s9, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[10:11]
-; GISEL-NEXT:    s_cbranch_execnz .LBB6_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s5
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s5, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], exec
+; GISEL_O-NEXT:    s_movk_i32 s4, 0x7b
+; GISEL_O-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s8, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s9, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[10:11], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[10:11]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB6_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s5
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call amdgpu_gfx void %fptr(i32 inreg 123)
   ret void
 }
 
 define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    v_mov_b32_e32 v41, v0
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s6, v1
-; GCN-NEXT:    v_readfirstlane_b32 s7, v2
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    v_mov_b32_e32 v0, v41
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
-; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execnz .LBB7_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, v41
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    v_mov_b32_e32 v41, v0
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s6, v1
+; GCN_O-NEXT:    v_readfirstlane_b32 s7, v2
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v41
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN_O-NEXT:    ; implicit-def: $vgpr1_vgpr2
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB7_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v41
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    v_mov_b32_e32 v41, v0
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s6, v1
-; GISEL-NEXT:    v_readfirstlane_b32 s7, v2
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
-; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GISEL-NEXT:    v_mov_b32_e32 v0, v41
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GISEL-NEXT:    ; implicit-def: $vgpr1
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execnz .LBB7_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_mov_b32_e32 v0, v41
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    v_mov_b32_e32 v41, v0
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s6, v1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s7, v2
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v41
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr1
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB7_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v41
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call amdgpu_gfx void %fptr(i32 %i)
   ret i32 %i
 }
@@ -1512,391 +1862,410 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
 ; allocator is not able to do that because the return value clashes with the liverange of an
 ; IMPLICIT_DEF of the argument.
 define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s6, v1
-; GCN-NEXT:    v_readfirstlane_b32 s7, v2
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
-; GCN-NEXT:    ; implicit-def: $vgpr0
-; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execnz .LBB8_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, v3
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s6, v1
+; GCN_O-NEXT:    v_readfirstlane_b32 s7, v2
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN_O-NEXT:    v_mov_b32_e32 v3, v0
+; GCN_O-NEXT:    ; implicit-def: $vgpr1_vgpr2
+; GCN_O-NEXT:    ; implicit-def: $vgpr0
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB8_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v3
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s8, v1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s9, v2
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL_O-NEXT:    v_mov_b32_e32 v2, v0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr1
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB8_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v2
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s8, v1
-; GISEL-NEXT:    v_readfirstlane_b32 s9, v2
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GISEL-NEXT:    v_mov_b32_e32 v2, v0
-; GISEL-NEXT:    ; implicit-def: $vgpr1
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[6:7]
-; GISEL-NEXT:    s_cbranch_execnz .LBB8_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_mov_b32_e32 v0, v2
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   %ret = call amdgpu_gfx i32 %fptr(i32 %i)
   ret i32 %ret
 }
 
 ; Calling a vgpr can never be a tail call.
 define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
-; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s6, v0
-; GCN-NEXT:    v_readfirstlane_b32 s7, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execnz .LBB9_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s6, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s7, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB9_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s6, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s7, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB9_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s6, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s7, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execnz .LBB9_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   tail call amdgpu_gfx void %fptr()
   ret void
 }
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index 9b52695fefb7224..9a0bf1397028b76 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_C %s
+; RUN: FileCheck -check-prefix=REMARK %s < %t
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -attributor-assume-closed-world=false -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_O %s
 ; RUN: FileCheck -check-prefix=REMARK %s < %t
 
 ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
@@ -157,16 +159,27 @@ define void @empty_func() !dbg !8 {
   ret void
 }
 
-; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
-; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs: 39
-; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs: 32
-; STDERR-NEXT: remark: foo.cl:64:0:     AGPRs: 10
-; STDERR-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
-; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
+; STDERR_O: remark: foo.cl:64:0: Function Name: test_indirect_call
+; STDERR_O-NEXT: remark: foo.cl:64:0:     SGPRs: 39
+; STDERR_O-NEXT: remark: foo.cl:64:0:     VGPRs: 32
+; STDERR_O-NEXT: remark: foo.cl:64:0:     AGPRs: 10
+; STDERR_O-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True
+; STDERR_O-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
+; STDERR_O-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
+
+; STDERR_C:      remark: foo.cl:64:0: Function Name: test_indirect_call
+; STDERR_C-NEXT: remark: foo.cl:64:0:     SGPRs: 4
+; STDERR_C-NEXT: remark: foo.cl:64:0:     VGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     AGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     Dynamic Stack: False
+; STDERR_C-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
+; STDERR_C-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
 define amdgpu_kernel void @test_indirect_call() !dbg !9 {
@@ -175,17 +188,27 @@ define amdgpu_kernel void @test_indirect_call() !dbg !9 {
   ret void
 }
 
-; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
-; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs: 39
-; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs: 32
-; STDERR-NEXT: remark: foo.cl:74:0:     AGPRs: 10
-; STDERR-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
-; STDERR-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
-; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
+; STDERR_O: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
+; STDERR_O-NEXT: remark: foo.cl:74:0:     SGPRs: 39
+; STDERR_O-NEXT: remark: foo.cl:74:0:     VGPRs: 32
+; STDERR_O-NEXT: remark: foo.cl:74:0:     AGPRs: 10
+; STDERR_O-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
+; STDERR_O-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True
+; STDERR_O-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
+; STDERR_O-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
 
+; STDERR_C:      remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
+; STDERR_C-NEXT: remark: foo.cl:74:0:     SGPRs: 12
+; STDERR_C-NEXT: remark: foo.cl:74:0:     VGPRs: 1
+; STDERR_C-NEXT: remark: foo.cl:74:0:     AGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
+; STDERR_C-NEXT: remark: foo.cl:74:0:     Dynamic Stack: False
+; STDERR_C-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
+; STDERR_C-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg)
 
 define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 7a01679f9972c70..fa5538c40227ea2 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 target datalayout = "A5"
 
 ; FIXME: Why is this commuted only sometimes?
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index babc9c3858f901d..6ee4facf8d8425d 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=AKF_GCN %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_CW %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_OW %s
 
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -attributor-assume-closed-world=false < %s | FileCheck -check-prefix=GFX9 %s
 
 target datalayout = "A5"
 
@@ -21,6 +22,17 @@ define internal void @indirect() {
   ret void
 }
 
+define ptr @helper() {
+; AKF_GCN-LABEL: define {{[^@]+}}@helper() {
+; AKF_GCN-NEXT:    ret ptr @indirect
+;
+; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@helper
+; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] {
+; ATTRIBUTOR_GCN-NEXT:    ret ptr @indirect
+;
+  ret ptr @indirect
+}
+
 define amdgpu_kernel void @test_simple_indirect_call() {
 ; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
 ; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
@@ -31,14 +43,23 @@ define amdgpu_kernel void @test_simple_indirect_call() {
 ; AKF_GCN-NEXT:    call void [[FP]]()
 ; AKF_GCN-NEXT:    ret void
 ;
-; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
-; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
-; ATTRIBUTOR_GCN-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
-; ATTRIBUTOR_GCN-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
-; ATTRIBUTOR_GCN-NEXT:    call void [[FP]]()
-; ATTRIBUTOR_GCN-NEXT:    ret void
+; ATTRIBUTOR_GCN_CW-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_GCN_CW-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
+; ATTRIBUTOR_GCN_CW-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_CW-NEXT:    call void @indirect()
+; ATTRIBUTOR_GCN_CW-NEXT:    ret void
+;
+; ATTRIBUTOR_GCN_OW-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_GCN_OW-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
+; ATTRIBUTOR_GCN_OW-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_OW-NEXT:    call void [[FP]]()
+; ATTRIBUTOR_GCN_OW-NEXT:    ret void
 ;
 ; GFX9-LABEL: test_simple_indirect_call:
 ; GFX9:       ; %bb.0:
@@ -73,6 +94,9 @@ define amdgpu_kernel void @test_simple_indirect_call() {
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_CW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+;.
+; ATTRIBUTOR_GCN_OW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.



More information about the llvm-commits mailing list