[llvm] pr/amdgpu closed world (PR #66488)

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 21 17:15:09 PST 2023


https://github.com/jdoerfert updated https://github.com/llvm/llvm-project/pull/66488

>From a986064b519658be2069b8c4fc160f3071c4ea6d Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Fri, 15 Sep 2023 03:33:46 -0700
Subject: [PATCH 1/2] [Attributor][NFC] Add NumCallees argument to callback

---
 llvm/include/llvm/Transforms/IPO/Attributor.h    | 7 ++++---
 llvm/lib/Transforms/IPO/Attributor.cpp           | 2 +-
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 3 ++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 30c51250af61ca..8fa5cca716af38 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1717,10 +1717,11 @@ struct Attributor {
   /// Return true if we should specialize the call site \b CB for the potential
   /// callee \p Fn.
   bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA,
-                                         CallBase &CB, Function &Callee) {
+                                         CallBase &CB, Function &Callee,
+                                         unsigned NumCallees) {
     return Configuration.IndirectCalleeSpecializationCallback
-               ? Configuration.IndirectCalleeSpecializationCallback(*this, AA,
-                                                                    CB, Callee)
+               ? Configuration.IndirectCalleeSpecializationCallback(
+                     *this, AA, CB, Callee, NumCallees)
                : true;
   }
 
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index d8e290cbc8a4d0..db2d0c269f2e6a 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3831,7 +3831,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
   if (MaxSpecializationPerCB.getNumOccurrences()) {
     AC.IndirectCalleeSpecializationCallback =
         [&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
-            Function &Callee) {
+            Function &Callee, unsigned NumCallees) {
           if (MaxSpecializationPerCB == 0)
             return false;
           auto &Set = IndirectCalleeTrackingMap[&CB];
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 8e1f782f7cd811..b620dc0f4162ad 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12386,7 +12386,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
     SmallVector<Function *, 8> SkippedAssumedCallees;
     SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
     for (Function *NewCallee : AssumedCallees) {
-      if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
+      if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee,
+                                               AssumedCallees.size())) {
         SkippedAssumedCallees.push_back(NewCallee);
         SpecializedForAllCallees = false;
         continue;

>From afbfd50aace7585ffcffa72cb82a6a2477adb325 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Wed, 23 Aug 2023 17:28:51 -0700
Subject: [PATCH 2/2] [Attributor][AMDGPU] Improve indirect call support in
 closed modules

If we see all functions that can be called, thus in a "closed world",
we can perform better reasoning in the presence of unknown callees of
indirect calls. We now collect all indirectly callable functions and
limit the potentially called functions to those.

The AMDGPU backend is the only user for now. We should enable this for
AMDGPU (and NVIDIA GPUs in certain cases) also when we run the
Attributor (or OpenMP-opt) earlier in the pipeline.
---
 llvm/include/llvm/CodeGen/TargetPassConfig.h  |   10 +
 llvm/include/llvm/Target/TargetMachine.h      |   13 +-
 llvm/include/llvm/Transforms/IPO/Attributor.h |    2 +-
 llvm/lib/CodeGen/LLVMTargetMachine.cpp        |   15 +-
 llvm/lib/LTO/LTOBackend.cpp                   |    5 +-
 llvm/lib/Target/AMDGPU/AMDGPU.h               |    9 +-
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   |   45 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |    8 +-
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h  |    3 +
 .../GlobalISel/irtranslator-indirect-call.ll  |   36 +-
 .../annotate-kernel-features-hsa-call.ll      |  175 +-
 .../AMDGPU/attributor-loop-issue-58639.ll     |   65 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll    |   44 +-
 .../AMDGPU/duplicate-attribute-indirect.ll    |   32 +-
 .../enable-scratch-only-dynamic-stack.ll      |   26 +-
 llvm/test/CodeGen/AMDGPU/indirect-call.ll     | 3194 +++++++++--------
 .../AMDGPU/resource-optimization-remarks.ll   |   65 +-
 llvm/test/CodeGen/AMDGPU/sibling-call.ll      |    6 +-
 .../CodeGen/AMDGPU/simple-indirect-call.ll    |   48 +-
 llvm/tools/llc/llc.cpp                        |    8 +-
 20 files changed, 2077 insertions(+), 1732 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 66365419aa330b..a489a19606b305 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -139,6 +139,9 @@ class TargetPassConfig : public ImmutablePass {
   /// callers.
   bool RequireCodeGenSCCOrder = false;
 
+  /// Asserts whether we can assume whole program visibility during codegen.
+  bool HasWholeProgramVisibility = false;
+
   /// Add the actual instruction selection passes. This does not include
   /// preparation passes on IR.
   bool addCoreISelPasses();
@@ -189,6 +192,13 @@ class TargetPassConfig : public ImmutablePass {
     setOpt(RequireCodeGenSCCOrder, Enable);
   }
 
+  bool getHasWholeProgramVisibility() const {
+    return HasWholeProgramVisibility;
+  }
+  void setHasWholeProgramVisibility(bool Enable) {
+    setOpt(HasWholeProgramVisibility, Enable);
+  }
+
   /// Allow the target to override a specific pass without overriding the pass
   /// pipeline. When passes are added to the standard pipeline at the
   /// point where StandardID is expected, add TargetID in its place.
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 4c29f25bedf419..b28aa52d55f359 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -378,7 +378,8 @@ class TargetMachine {
   addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &,
                       raw_pwrite_stream *, CodeGenFileType,
                       bool /*DisableVerify*/ = true,
-                      MachineModuleInfoWrapperPass *MMIWP = nullptr) {
+                      MachineModuleInfoWrapperPass *MMIWP = nullptr,
+                      bool HasWholeProgramVisibility = false) {
     return true;
   }
 
@@ -444,11 +445,11 @@ class LLVMTargetMachine : public TargetMachine {
   /// emitted.  Typically this will involve several steps of code generation.
   /// \p MMIWP is an optional parameter that, if set to non-nullptr,
   /// will be used to set the MachineModuloInfo for this PM.
-  bool
-  addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
-                      raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
-                      bool DisableVerify = true,
-                      MachineModuleInfoWrapperPass *MMIWP = nullptr) override;
+  bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
+                           raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
+                           bool DisableVerify = true,
+                           MachineModuleInfoWrapperPass *MMIWP = nullptr,
+                           bool HasWholeProgramVisibility = false) override;
 
   virtual Error buildCodeGenPipeline(ModulePassManager &,
                                      MachineFunctionPassManager &,
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 8fa5cca716af38..77c2fe022a093c 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1447,7 +1447,7 @@ struct AttributorConfig {
   /// Callback function to determine if an indirect call targets should be made
   /// direct call targets (with an if-cascade).
   std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB,
-                     Function &AssummedCallee)>
+                     Function &AssummedCallee, unsigned NumCallees)>
       IndirectCalleeSpecializationCallback = nullptr;
 
   /// Helper to update an underlying call graph and to delete functions.
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 42cabb58e5189d..8ae7b764ab7dc4 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -114,13 +114,14 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) const {
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
 static TargetPassConfig *
 addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM,
-                        bool DisableVerify,
-                        MachineModuleInfoWrapperPass &MMIWP) {
+                        bool DisableVerify, MachineModuleInfoWrapperPass &MMIWP,
+                        bool HasWholeProgramVisibility) {
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
   TargetPassConfig *PassConfig = TM.createPassConfig(PM);
   // Set PassConfig options provided by TargetMachine.
   PassConfig->setDisableVerify(DisableVerify);
+  PassConfig->setHasWholeProgramVisibility(HasWholeProgramVisibility);
   PM.add(PassConfig);
   PM.add(&MMIWP);
 
@@ -233,12 +234,12 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
 bool LLVMTargetMachine::addPassesToEmitFile(
     PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
     CodeGenFileType FileType, bool DisableVerify,
-    MachineModuleInfoWrapperPass *MMIWP) {
+    MachineModuleInfoWrapperPass *MMIWP, bool HasWholeProgramVisibility) {
   // Add common CodeGen passes.
   if (!MMIWP)
     MMIWP = new MachineModuleInfoWrapperPass(this);
-  TargetPassConfig *PassConfig =
-      addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP);
+  TargetPassConfig *PassConfig = addPassesToGenerateCode(
+      *this, PM, DisableVerify, *MMIWP, HasWholeProgramVisibility);
   if (!PassConfig)
     return true;
 
@@ -265,8 +266,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
   MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(this);
-  TargetPassConfig *PassConfig =
-      addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP);
+  TargetPassConfig *PassConfig = addPassesToGenerateCode(
+      *this, PM, DisableVerify, *MMIWP, /*HasWholeProgramVisibility=*/false);
   if (!PassConfig)
     return true;
   assert(TargetPassConfig::willCompleteCodeGenPipeline() &&
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index ccc4276e36dacf..aa6542756ea9a0 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -416,8 +416,9 @@ static void codegen(const Config &Conf, TargetMachine *TM,
   if (Conf.PreCodeGenPassesHook)
     Conf.PreCodeGenPassesHook(CodeGenPasses);
   if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
-                              DwoOut ? &DwoOut->os() : nullptr,
-                              Conf.CGFileType))
+                              DwoOut ? &DwoOut->os() : nullptr, Conf.CGFileType,
+                              /*DisableVerify=*/true, /*MMIWP=*/nullptr,
+                              Conf.HasWholeProgramVisibility))
     report_fatal_error("Failed to setup codegen");
   CodeGenPasses.run(Mod);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 35d33cb60bc47c..574830d6871a75 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -88,7 +88,7 @@ extern char &AMDGPUMachineCFGStructurizerID;
 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
 
 Pass *createAMDGPUAnnotateKernelFeaturesPass();
-Pass *createAMDGPUAttributorLegacyPass();
+Pass *createAMDGPUAttributorLegacyPass(bool HasWholeProgramVisibility = false);
 void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
 extern char &AMDGPUAnnotateKernelFeaturesID;
@@ -271,8 +271,13 @@ class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
 private:
   TargetMachine &TM;
 
+  /// Asserts whether we can assume whole program visibility during codegen.
+  bool HasWholeProgramVisibility = false;
+
 public:
-  AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
+  AMDGPUAttributorPass(TargetMachine &TM,
+                       bool HasWholeProgramVisibility = false)
+      : TM(TM), HasWholeProgramVisibility(HasWholeProgramVisibility){};
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 5fd9e571282dbc..640ea603882dc1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -14,11 +14,15 @@
 #include "GCNSubtarget.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/Attributor.h"
+#include <optional>
 
 #define DEBUG_TYPE "amdgpu-attributor"
 
@@ -933,7 +937,8 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
   }
 }
 
-static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
+static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
+                    bool HasWholeProgramVisibility) {
   SetVector<Function *> Functions;
   for (Function &F : M) {
     if (!F.isIntrinsic())
@@ -947,12 +952,31 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
       {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
        &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
        &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
-       &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
+       &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
+       &AAIndirectCallInfo::ID});
+
+  /// Helper to decide if we should specialize the indirect \p CB for \p Callee,
+  /// which is one of the \p NumCallees potential callees.
+  auto IndirectCalleeSpecializationCallback =
+      [&](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
+          Function &Callee, unsigned NumCallees) {
+        if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv()))
+          return false;
+        // Singleton functions should be specialized.
+        if (NumCallees == 1)
+          return true;
+        // Otherewise specialize uniform values.
+        const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller());
+        return TTI.isAlwaysUniform(CB.getCalledOperand());
+      };
 
   AttributorConfig AC(CGUpdater);
   AC.Allowed = &Allowed;
   AC.IsModulePass = true;
   AC.DefaultInitializeLiveInternals = false;
+  AC.IsClosedWorldModule = HasWholeProgramVisibility;
+  AC.IndirectCalleeSpecializationCallback =
+      IndirectCalleeSpecializationCallback;
   AC.IPOAmendableCB = [](const Function &F) {
     return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
   };
@@ -978,8 +1002,12 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
 }
 
 class AMDGPUAttributorLegacy : public ModulePass {
+  /// Asserts whether we can assume whole program visibility during codegen.
+  bool HasWholeProgramVisibility = false;
+
 public:
-  AMDGPUAttributorLegacy() : ModulePass(ID) {}
+  AMDGPUAttributorLegacy(bool HasWholeProgramVisibility = false)
+      : ModulePass(ID), HasWholeProgramVisibility(HasWholeProgramVisibility) {}
 
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary initialization before any pass is run.
@@ -994,7 +1022,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
 
   bool runOnModule(Module &M) override {
     AnalysisGetter AG(this);
-    return runImpl(M, AG, *TM);
+    return runImpl(M, AG, *TM, HasWholeProgramVisibility);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1015,14 +1043,15 @@ PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
   AnalysisGetter AG(FAM);
 
   // TODO: Probably preserves CFG
-  return runImpl(M, AG, TM) ? PreservedAnalyses::none()
-                            : PreservedAnalyses::all();
+  return runImpl(M, AG, TM, HasWholeProgramVisibility)
+             ? PreservedAnalyses::none()
+             : PreservedAnalyses::all();
 }
 
 char AMDGPUAttributorLegacy::ID = 0;
 
-Pass *llvm::createAMDGPUAttributorLegacyPass() {
-  return new AMDGPUAttributorLegacy();
+Pass *llvm::createAMDGPUAttributorLegacyPass(bool HasWholeProgramVisibility) {
+  return new AMDGPUAttributorLegacy(HasWholeProgramVisibility);
 }
 INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
                       false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index fdc2077868cf99..43108ab744169e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -50,6 +50,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Transforms/HipStdPar/HipStdPar.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -625,7 +626,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
       [this](StringRef PassName, ModulePassManager &PM,
              ArrayRef<PassBuilder::PipelineElement>) {
         if (PassName == "amdgpu-attributor") {
-          PM.addPass(AMDGPUAttributorPass(*this));
+          PM.addPass(AMDGPUAttributorPass(*this, HasWholeProgramVisibility));
           return true;
         }
         if (PassName == "amdgpu-unify-metadata") {
@@ -1004,7 +1005,8 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
 }
 
 void AMDGPUPassConfig::addIRPasses() {
-  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
+  AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
+  TM.HasWholeProgramVisibility = getHasWholeProgramVisibility();
 
   Triple::ArchType Arch = TM.getTargetTriple().getArch();
   if (RemoveIncompatibleFunctions && Arch == Triple::amdgcn)
@@ -1041,7 +1043,7 @@ void AMDGPUPassConfig::addIRPasses() {
   // AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run
   // after their introduction
   if (TM.getOptLevel() > CodeGenOptLevel::None)
-    addPass(createAMDGPUAttributorLegacyPass());
+    addPass(createAMDGPUAttributorLegacyPass(HasWholeProgramVisibility));
 
   if (TM.getOptLevel() > CodeGenOptLevel::None)
     addPass(createInferAddressSpacesPass());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 9051a61e65570c..abb25eb5d69065 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -38,6 +38,9 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
   static bool EnableFunctionCalls;
   static bool EnableLowerModuleLDS;
 
+  /// Asserts whether we can assume whole program visibility during codegen.
+  bool HasWholeProgramVisibility = false;
+
   AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                       StringRef FS, TargetOptions Options,
                       std::optional<Reloc::Model> RM,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index 4eba84f61c2d8a..0bb9a58c29ae71 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -attributor-assume-closed-world=false -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CHECK %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CWRLD %s
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
@@ -52,24 +53,31 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
+  ;
+  ; CWRLD-LABEL: name: test_indirect_call_sgpr_ptr
+  ; CWRLD: bb.1 (%ir-block.0):
+  ; CWRLD-NEXT:   liveins: $sgpr4_sgpr5
+  ; CWRLD-NEXT: {{  $}}
+  ; CWRLD-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; CWRLD-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   call void %fptr()
   ret void
 }
 
 define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) {
-  ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr
-  ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   SI_RETURN
+  ; SAMEC-LABEL: name: test_gfx_indirect_call_sgpr_ptr
+  ; SAMEC: bb.1 (%ir-block.0):
+  ; SAMEC-NEXT:   liveins: $vgpr0, $vgpr1
+  ; SAMEC-NEXT: {{  $}}
+  ; SAMEC-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; SAMEC-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; SAMEC-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; SAMEC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; SAMEC-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; SAMEC-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
+  ; SAMEC-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; SAMEC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; SAMEC-NEXT:   SI_RETURN
   call amdgpu_gfx void %fptr()
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 1396dab69c13a1..b62bdb9cc2b509 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_HSA %s
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,OWRLD_ATTR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor -attributor-assume-closed-world < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,CWRLD_ATTR_HSA %s
 
 ; TODO: The test contains UB which is refined by the Attributor and should be removed.
 
@@ -18,6 +19,16 @@ declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
 declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
 declare i64 @llvm.amdgcn.dispatch.id() #0
 
+ at G1 = global ptr poison
+ at G2 = global ptr poison
+
+;.
+; AKF_HSA: @G1 = global ptr poison
+; AKF_HSA: @G2 = global ptr poison
+;.
+; ATTRIBUTOR_HSA: @G1 = global ptr poison
+; ATTRIBUTOR_HSA: @G2 = global ptr poison
+;.
 define void @use_workitem_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
 ; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] {
@@ -766,19 +777,55 @@ define float @func_indirect_call(ptr %fptr) #3 {
 ; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
 ; AKF_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; AKF_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; AKF_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
-; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
-; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
-; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
-; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
+; OWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
+; OWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; OWRLD_ATTR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
+; OWRLD_ATTR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; OWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; OWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
+; OWRLD_ATTR_HSA-NEXT:    ret float [[FADD]]
+;
+; CWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
+; CWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] {
+; CWRLD_ATTR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]](), !callees [[META1:![0-9]+]]
+; CWRLD_ATTR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; CWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; CWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
+; CWRLD_ATTR_HSA-NEXT:    ret float [[FADD]]
 ;
   %f = call float %fptr()
   %fadd = fadd float %f, 1.0
+  store ptr @indirect_callee1, ptr @G1
+  store ptr @indirect_callee2, ptr @G2
   ret float %fadd
 }
 
+define float @indirect_callee1() {
+; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee1() {
+; AKF_HSA-NEXT:    ret float 0x40091EB860000000
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee1
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-NEXT:    ret float 0x40091EB860000000
+;
+  ret float 0x40091EB860000000
+}
+define float @indirect_callee2(float noundef %arg) {
+; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee2
+; AKF_HSA-SAME: (float noundef [[ARG:%.*]]) {
+; AKF_HSA-NEXT:    ret float [[ARG]]
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee2
+; ATTRIBUTOR_HSA-SAME: (float noundef [[ARG:%.*]]) #[[ATTR19]] {
+; ATTRIBUTOR_HSA-NEXT:    ret float [[ARG]]
+;
+  ret float %arg
+}
+
 declare float @extern() #3
 define float @func_extern_call() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call
@@ -845,7 +892,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -861,7 +908,7 @@ define void @func_sanitize_address() #4 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -877,7 +924,7 @@ define void @func_indirect_sanitize_address() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -893,7 +940,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -928,7 +975,7 @@ define internal void @enqueue_block_def() #6 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -941,7 +988,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_decl()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -956,7 +1003,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_def()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -969,7 +1016,7 @@ define void @unused_enqueue_block() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -980,7 +1027,7 @@ define internal void @known_func() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -994,7 +1041,7 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @known_func() #[[ATTR29:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -1024,38 +1071,72 @@ attributes #6 = { "enqueued-block" }
 ; AKF_HSA: attributes #[[ATTR7]] = { "enqueued-block" }
 ; AKF_HSA: attributes #[[ATTR8]] = { "amdgpu-calls" }
 ;.
-; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR28]] = { nounwind }
-; ATTRIBUTOR_HSA: attributes #[[ATTR29]] = { "enqueued-block" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; OWRLD_ATTR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR19]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR21]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR23]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR24:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR25:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR26]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR27]] = { "uniform-work-group-size"="false" }
+; OWRLD_ATTR_HSA: attributes #[[ATTR28]] = { nounwind }
+; OWRLD_ATTR_HSA: attributes #[[ATTR29]] = { "enqueued-block" }
+;.
+; CWRLD_ATTR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CWRLD_ATTR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR19]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR21]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR23]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR24:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR25:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR26]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR27]] = { "uniform-work-group-size"="false" }
+; CWRLD_ATTR_HSA: attributes #[[ATTR28]] = { nounwind }
+; CWRLD_ATTR_HSA: attributes #[[ATTR29]] = { "enqueued-block" }
 ;.
 ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500}
 ;.
-; ATTRIBUTOR_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500}
+; OWRLD_ATTR_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500}
+;.
+; CWRLD_ATTR_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500}
+; CWRLD_ATTR_HSA: [[META1]] = !{ptr @indirect_callee1, ptr @indirect_callee2}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
index 2b9f579e6a1839..c9c651ab4a84a0 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,OWRLD
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world %s | FileCheck %s --check-prefixes=CHECK,CWRLD
 
 %0 = type { ptr, ptr }
 
@@ -20,19 +21,32 @@ bb:
 }
 
 define internal fastcc double @baz(ptr %arg) {
-; CHECK-LABEL: define {{[^@]+}}@baz
-; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call double [[TMP1]]()
-; CHECK-NEXT:    br label [[BB3:%.*]]
-; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
-; CHECK-NEXT:    br label [[BB5:%.*]]
-; CHECK:       bb5:
-; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
-; CHECK-NEXT:    br label [[BB5]]
+; OWRLD-LABEL: define {{[^@]+}}@baz
+; OWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
+; OWRLD-NEXT:  bb:
+; OWRLD-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
+; OWRLD-NEXT:    [[TMP2:%.*]] = tail call double [[TMP1]]()
+; OWRLD-NEXT:    br label [[BB3:%.*]]
+; OWRLD:       bb3:
+; OWRLD-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
+; OWRLD-NEXT:    br label [[BB5:%.*]]
+; OWRLD:       bb5:
+; OWRLD-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+; OWRLD-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
+; OWRLD-NEXT:    br label [[BB5]]
+;
+; CWRLD-LABEL: define {{[^@]+}}@baz
+; CWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
+; CWRLD-NEXT:  bb:
+; CWRLD-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
+; CWRLD-NEXT:    unreachable
+; CWRLD:       bb3:
+; CWRLD-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
+; CWRLD-NEXT:    br label [[BB5:%.*]]
+; CWRLD:       bb5:
+; CWRLD-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CWRLD-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
+; CWRLD-NEXT:    br label [[BB5]]
 ;
 bb:
   %tmp1 = load ptr, ptr %arg, align 8
@@ -49,13 +63,19 @@ bb5:                                              ; preds = %bb5, %bb3
   br label %bb5
 }
 
-define amdgpu_kernel void @entry() {
-; CHECK-LABEL: define {{[^@]+}}@entry
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
-; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
-; CHECK-NEXT:    ret void
+define amdgpu_kernel void @entry() { ; OWRLD-LABEL: define {{[^@]+}}@entry
+; OWRLD-SAME: () #[[ATTR0]] {
+; OWRLD-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
+; OWRLD-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; OWRLD-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@entry
+; CWRLD-SAME: () #[[ATTR1:[0-9]+]] {
+; CWRLD-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
+; CWRLD-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CWRLD-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
+; CWRLD-NEXT:    ret void
 ;
   %alloca = alloca %0, align 8, addrspace(5)
   %cast = addrspacecast ptr addrspace(5) %alloca to ptr
@@ -63,5 +83,6 @@ define amdgpu_kernel void @entry() {
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index 0c034192869b18..c0693442984870 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,OWRLD
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world %s | FileCheck %s --check-prefixes=CHECK,CWRLD
 
 define internal void @indirect() {
 ; CHECK-LABEL: define {{[^@]+}}@indirect
@@ -10,13 +11,21 @@ define internal void @indirect() {
 }
 
 define internal void @direct() {
-; CHECK-LABEL: define {{[^@]+}}@direct
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; CHECK-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
-; CHECK-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; CHECK-NEXT:    call void [[FP]]()
-; CHECK-NEXT:    ret void
+; OWRLD-LABEL: define {{[^@]+}}@direct
+; OWRLD-SAME: () #[[ATTR1:[0-9]+]] {
+; OWRLD-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; OWRLD-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; OWRLD-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; OWRLD-NEXT:    call void [[FP]]()
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@direct
+; CWRLD-SAME: () #[[ATTR0]] {
+; CWRLD-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; CWRLD-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; CWRLD-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; CWRLD-NEXT:    call void @indirect()
+; CWRLD-NEXT:    ret void
 ;
   %fptr = alloca ptr, addrspace(5)
   store ptr @indirect, ptr addrspace(5) %fptr
@@ -26,15 +35,22 @@ define internal void @direct() {
 }
 
 define amdgpu_kernel void @test_direct_indirect_call() {
-; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    call void @direct()
-; CHECK-NEXT:    ret void
+; OWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call
+; OWRLD-SAME: () #[[ATTR1]] {
+; OWRLD-NEXT:    call void @direct()
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call
+; CWRLD-SAME: () #[[ATTR0]] {
+; CWRLD-NEXT:    call void @direct()
+; CWRLD-NEXT:    ret void
 ;
   call void @direct()
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+;.
+; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index 0069370cc9721c..93b00ad572229c 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=AKF_GCN %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_OWR
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_CWR
 
 define internal void @indirect() {
 ; AKF_GCN-LABEL: define {{[^@]+}}@indirect() {
@@ -22,13 +23,21 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
 ; AKF_GCN-NEXT:    call void [[FP]]()
 ; AKF_GCN-NEXT:    ret void
 ;
-; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
-; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT:    call void [[FP]]()
-; ATTRIBUTOR_GCN-NEXT:    ret void
+; ATTRIBUTOR_OWR-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_OWR-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_OWR-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_OWR-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_OWR-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_OWR-NEXT:    call void [[FP]]()
+; ATTRIBUTOR_OWR-NEXT:    ret void
+;
+; ATTRIBUTOR_CWR-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_CWR-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_CWR-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_CWR-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_CWR-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_CWR-NEXT:    call void @indirect()
+; ATTRIBUTOR_CWR-NEXT:    ret void
 ;
   %fptr = alloca ptr, addrspace(5)
   store ptr @indirect, ptr addrspace(5) %fptr
@@ -42,6 +51,9 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_OWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_OWR: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
+;.
+; ATTRIBUTOR_CWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CWR: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
index 22f90682aa9738..ca73d33edfc864 100644
--- a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
+++ b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
@@ -1,18 +1,26 @@
-; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV5C %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV4C %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV5O %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV4O %s
 
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
-; No stack objects, only indirect call has to enable scrathch
-; GCN-LABEL: test_indirect_call:
+; No stack objects, only indirect call has to enable scratch
+; GCNO-LABEL: test_indirect_call:
+; GCNC-LABEL: test_indirect_call:
 
-; COV5: .amdhsa_private_segment_fixed_size 0{{$}}
-; COV4: .amdhsa_private_segment_fixed_size 16384{{$}}
+; COV5O: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV5C: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV4C: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV4O: .amdhsa_private_segment_fixed_size 16384{{$}}
 
-; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
+; GCNO: .amdhsa_user_sgpr_private_segment_buffer 1
+; GCNC: .amdhsa_user_sgpr_private_segment_buffer 1
 
-; COV5: .amdhsa_uses_dynamic_stack 1
-; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; COV5O: .amdhsa_uses_dynamic_stack 1
+; COV5C: .amdhsa_uses_dynamic_stack 0
+; GCNO: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; GCNC: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
 define amdgpu_kernel void @test_indirect_call() {
   %fptr = load ptr, ptr addrspace(4) @gv.fptr0
   call void %fptr()
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index 3aaf04c94cda58..24f4798eabb88e 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -1,837 +1,899 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_O %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_C %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_O %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_C %s
 
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
-; GCN-LABEL: test_indirect_call_sgpr_ptr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GCN-NEXT:    s_add_i32 s12, s12, s17
-; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GCN-NEXT:    s_add_u32 s0, s0, s17
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_mov_b32 s13, s15
-; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_getpc_b64 s[14:15]
-; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GCN-NEXT:    s_add_u32 s8, s8, 8
-; GCN-NEXT:    s_addc_u32 s9, s9, 0
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
-; GCN-NEXT:    s_mov_b32 s14, s16
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GCN-NEXT:    s_endpgm
+; GCN_O-LABEL: test_indirect_call_sgpr_ptr:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_mov_b32 s32, 0
+; GCN_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GCN_O-NEXT:    s_add_i32 s12, s12, s17
+; GCN_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN_O-NEXT:    s_add_u32 s0, s0, s17
+; GCN_O-NEXT:    s_addc_u32 s1, s1, 0
+; GCN_O-NEXT:    s_mov_b32 s13, s15
+; GCN_O-NEXT:    s_mov_b32 s12, s14
+; GCN_O-NEXT:    s_getpc_b64 s[14:15]
+; GCN_O-NEXT:    s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
+; GCN_O-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GCN_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GCN_O-NEXT:    s_add_u32 s8, s8, 8
+; GCN_O-NEXT:    s_addc_u32 s9, s9, 0
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GCN_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GCN_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GCN_O-NEXT:    s_mov_b32 s14, s16
+; GCN_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN_O-NEXT:    s_endpgm
 ;
-; GISEL-LABEL: test_indirect_call_sgpr_ptr:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_add_i32 s12, s12, s17
-; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GISEL-NEXT:    s_add_u32 s0, s0, s17
-; GISEL-NEXT:    s_addc_u32 s1, s1, 0
-; GISEL-NEXT:    s_mov_b32 s13, s15
-; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_getpc_b64 s[14:15]
-; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
-; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
-; GISEL-NEXT:    s_mov_b32 s14, s16
-; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GISEL-NEXT:    s_endpgm
+; GCN_C-LABEL: test_indirect_call_sgpr_ptr:
+; GCN_C:       ; %bb.0:
+;
+; GISEL_O-LABEL: test_indirect_call_sgpr_ptr:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_mov_b32 s32, 0
+; GISEL_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL_O-NEXT:    s_add_i32 s12, s12, s17
+; GISEL_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL_O-NEXT:    s_add_u32 s0, s0, s17
+; GISEL_O-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL_O-NEXT:    s_mov_b32 s13, s15
+; GISEL_O-NEXT:    s_mov_b32 s12, s14
+; GISEL_O-NEXT:    s_getpc_b64 s[14:15]
+; GISEL_O-NEXT:    s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
+; GISEL_O-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL_O-NEXT:    s_add_u32 s8, s8, 8
+; GISEL_O-NEXT:    s_addc_u32 s9, s9, 0
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
+; GISEL_O-NEXT:    v_or_b32_e32 v31, v0, v1
+; GISEL_O-NEXT:    s_mov_b32 s14, s16
+; GISEL_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL_O-NEXT:    s_endpgm
+;
+; GISEL_C-LABEL: test_indirect_call_sgpr_ptr:
+; GISEL_C:       ; %bb.0:
   %fptr = load ptr, ptr addrspace(4) @gv.fptr0
   call void %fptr()
   ret void
 }
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
-; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GCN-NEXT:    s_add_i32 s12, s12, s17
-; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GCN-NEXT:    s_add_u32 s0, s0, s17
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_mov_b32 s13, s15
-; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_getpc_b64 s[14:15]
-; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GCN-NEXT:    s_add_u32 s8, s8, 8
-; GCN-NEXT:    s_addc_u32 s9, s9, 0
-; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
-; GCN-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GCN-NEXT:    s_mov_b32 s14, s16
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GCN-NEXT:    s_endpgm
+; GCN_O-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_mov_b32 s32, 0
+; GCN_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GCN_O-NEXT:    s_add_i32 s12, s12, s17
+; GCN_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN_O-NEXT:    s_add_u32 s0, s0, s17
+; GCN_O-NEXT:    s_addc_u32 s1, s1, 0
+; GCN_O-NEXT:    s_mov_b32 s13, s15
+; GCN_O-NEXT:    s_mov_b32 s12, s14
+; GCN_O-NEXT:    s_getpc_b64 s[14:15]
+; GCN_O-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
+; GCN_O-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GCN_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GCN_O-NEXT:    s_add_u32 s8, s8, 8
+; GCN_O-NEXT:    s_addc_u32 s9, s9, 0
+; GCN_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GCN_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GCN_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GCN_O-NEXT:    s_mov_b32 s14, s16
+; GCN_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN_O-NEXT:    s_endpgm
+;
+; GCN_C-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GCN_C:       ; %bb.0:
 ;
-; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_add_i32 s12, s12, s17
-; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GISEL-NEXT:    s_add_u32 s0, s0, s17
-; GISEL-NEXT:    s_addc_u32 s1, s1, 0
-; GISEL-NEXT:    s_mov_b32 s13, s15
-; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_getpc_b64 s[14:15]
-; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
-; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
-; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GISEL-NEXT:    s_mov_b32 s14, s16
-; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GISEL-NEXT:    s_endpgm
+; GISEL_O-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_mov_b32 s32, 0
+; GISEL_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL_O-NEXT:    s_add_i32 s12, s12, s17
+; GISEL_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL_O-NEXT:    s_add_u32 s0, s0, s17
+; GISEL_O-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL_O-NEXT:    s_mov_b32 s13, s15
+; GISEL_O-NEXT:    s_mov_b32 s12, s14
+; GISEL_O-NEXT:    s_getpc_b64 s[14:15]
+; GISEL_O-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
+; GISEL_O-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GISEL_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL_O-NEXT:    s_add_u32 s8, s8, 8
+; GISEL_O-NEXT:    s_addc_u32 s9, s9, 0
+; GISEL_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL_O-NEXT:    s_mov_b32 s14, s16
+; GISEL_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL_O-NEXT:    s_endpgm
+;
+; GISEL_C-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GISEL_C:       ; %bb.0:
   %fptr = load ptr, ptr addrspace(4) @gv.fptr1
   call void %fptr(i32 123)
   ret void
 }
 
 define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB2_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB2_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB2_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB2_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call void %fptr()
   ret void
 }
 
 define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GCN-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    v_mov_b32_e32 v0, v2
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    ; implicit-def: $vgpr2
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB3_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:    v_mov_b32_e32 v2, 0x7b
+; GCN_O-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v2
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    ; implicit-def: $vgpr2
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB3_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB3_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB3_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call void %fptr(i32 123)
   ret void
 }
 
 define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    v_mov_b32_e32 v2, v0
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB4_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    v_mov_b32_e32 v2, v0
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB4_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    v_mov_b32_e32 v1, v0
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB4_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v1
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    v_mov_b32_e32 v1, v0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB4_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_add_i32_e32 v0, vcc, 1, v1
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   %a = call i32 %fptr()
   %b = add i32 %a, 1
   ret i32 %b
 }
 
 define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
-; GCN:       ; %bb.0: ; %bb0
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 20
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT:    s_and_saveexec_b64 s[46:47], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB5_4
-; GCN-NEXT:  ; %bb.1: ; %bb1
-; GCN-NEXT:    s_mov_b64 s[48:49], exec
-; GCN-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[50:51], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[50:51]
-; GCN-NEXT:    s_cbranch_execnz .LBB5_2
-; GCN-NEXT:  ; %bb.3:
-; GCN-NEXT:    s_mov_b64 exec, s[48:49]
-; GCN-NEXT:  .LBB5_4: ; %bb2
-; GCN-NEXT:    s_or_b64 exec, exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 20
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GCN_O:       ; %bb.0: ; %bb0
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 20
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    v_and_b32_e32 v2, 1, v2
+; GCN_O-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
+; GCN_O-NEXT:    s_and_saveexec_b64 s[46:47], vcc
+; GCN_O-NEXT:    s_cbranch_execz .LBB5_4
+; GCN_O-NEXT:  ; %bb.1: ; %bb1
+; GCN_O-NEXT:    s_mov_b64 s[48:49], exec
+; GCN_O-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[50:51], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[50:51]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB5_2
+; GCN_O-NEXT:  ; %bb.3:
+; GCN_O-NEXT:    s_mov_b64 exec, s[48:49]
+; GCN_O-NEXT:  .LBB5_4: ; %bb2
+; GCN_O-NEXT:    s_or_b64 exec, exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 20
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GCN_C:       ; %bb.0: ; %bb0
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_C-NEXT:    v_and_b32_e32 v0, 1, v2
+; GCN_C-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN_C-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN_C-NEXT:  ; %bb.1: ; %bb1
+; GCN_C-NEXT:    ; divergent unreachable
+; GCN_C-NEXT:  ; %bb.2: ; %UnifiedReturnBlock
+; GCN_C-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN_C-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
-; GISEL:       ; %bb.0: ; %bb0
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 20
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    s_and_saveexec_b64 s[46:47], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB5_4
-; GISEL-NEXT:  ; %bb.1: ; %bb1
-; GISEL-NEXT:    s_mov_b64 s[48:49], exec
-; GISEL-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[50:51], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[50:51]
-; GISEL-NEXT:    s_cbranch_execnz .LBB5_2
-; GISEL-NEXT:  ; %bb.3:
-; GISEL-NEXT:    s_mov_b64 exec, s[48:49]
-; GISEL-NEXT:  .LBB5_4: ; %bb2
-; GISEL-NEXT:    s_or_b64 exec, exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 20
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GISEL_O:       ; %bb.0: ; %bb0
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 20
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    v_and_b32_e32 v2, 1, v2
+; GISEL_O-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[46:47], vcc
+; GISEL_O-NEXT:    s_cbranch_execz .LBB5_4
+; GISEL_O-NEXT:  ; %bb.1: ; %bb1
+; GISEL_O-NEXT:    s_mov_b64 s[48:49], exec
+; GISEL_O-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[50:51], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[50:51]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB5_2
+; GISEL_O-NEXT:  ; %bb.3:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[48:49]
+; GISEL_O-NEXT:  .LBB5_4: ; %bb2
+; GISEL_O-NEXT:    s_or_b64 exec, exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 20
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GISEL_C:       ; %bb.0: ; %bb0
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_C-NEXT:    v_and_b32_e32 v0, 1, v2
+; GISEL_C-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL_C-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL_C-NEXT:  ; %bb.1: ; %bb1
+; GISEL_C-NEXT:    ; divergent unreachable
+; GISEL_C-NEXT:  ; %bb.2: ; %UnifiedReturnBlock
+; GISEL_C-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL_C-NEXT:    s_setpc_b64 s[30:31]
 bb0:
   br i1 %cond, label %bb1, label %bb2
 
@@ -844,393 +906,409 @@ bb2:
 }
 
 define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s5, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[6:7], exec
-; GCN-NEXT:    s_movk_i32 s4, 0x7b
-; GCN-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s8, v0
-; GCN-NEXT:    v_readfirstlane_b32 s9, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    s_xor_b64 exec, exec, s[10:11]
-; GCN-NEXT:    s_cbranch_execnz .LBB6_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s5
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s5, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[6:7], exec
+; GCN_O-NEXT:    s_movk_i32 s4, 0x7b
+; GCN_O-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s8, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s9, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[10:11], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[10:11]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB6_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s5
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s5, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], exec
+; GISEL_O-NEXT:    s_movk_i32 s4, 0x7b
+; GISEL_O-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s8, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s9, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[10:11], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[10:11]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB6_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s5
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s5, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GISEL-NEXT:    s_movk_i32 s4, 0x7b
-; GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s8, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s9, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[10:11]
-; GISEL-NEXT:    s_cbranch_execnz .LBB6_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s5
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call amdgpu_gfx void %fptr(i32 inreg 123)
   ret void
 }
 
 define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    v_mov_b32_e32 v41, v0
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s6, v1
-; GCN-NEXT:    v_readfirstlane_b32 s7, v2
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    v_mov_b32_e32 v0, v41
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
-; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execnz .LBB7_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, v41
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    v_mov_b32_e32 v41, v0
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s6, v1
+; GCN_O-NEXT:    v_readfirstlane_b32 s7, v2
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v41
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN_O-NEXT:    ; implicit-def: $vgpr1_vgpr2
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB7_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v41
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    v_mov_b32_e32 v41, v0
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s6, v1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s7, v2
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v41
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr1
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB7_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v41
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    v_mov_b32_e32 v41, v0
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s6, v1
-; GISEL-NEXT:    v_readfirstlane_b32 s7, v2
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
-; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GISEL-NEXT:    v_mov_b32_e32 v0, v41
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GISEL-NEXT:    ; implicit-def: $vgpr1
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execnz .LBB7_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_mov_b32_e32 v0, v41
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call amdgpu_gfx void %fptr(i32 %i)
   ret i32 %i
 }
@@ -1240,391 +1318,411 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
 ; allocator is not able to do that because the return value clashes with the liverange of an
 ; IMPLICIT_DEF of the argument.
 define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s8, v1
-; GCN-NEXT:    v_readfirstlane_b32 s9, v2
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
-; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
-; GCN-NEXT:    ; implicit-def: $vgpr0
-; GCN-NEXT:    s_xor_b64 exec, exec, s[6:7]
-; GCN-NEXT:    s_cbranch_execnz .LBB8_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, v3
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s8, v1
-; GISEL-NEXT:    v_readfirstlane_b32 s9, v2
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GISEL-NEXT:    v_mov_b32_e32 v2, v0
-; GISEL-NEXT:    ; implicit-def: $vgpr1
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[6:7]
-; GISEL-NEXT:    s_cbranch_execnz .LBB8_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_mov_b32_e32 v0, v2
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s8, v1
+; GCN_O-NEXT:    v_readfirstlane_b32 s9, v2
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GCN_O-NEXT:    v_mov_b32_e32 v3, v0
+; GCN_O-NEXT:    ; implicit-def: $vgpr1_vgpr2
+; GCN_O-NEXT:    ; implicit-def: $vgpr0
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB8_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v3
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s8, v1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s9, v2
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL_O-NEXT:    v_mov_b32_e32 v2, v0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr1
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB8_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v2
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   %ret = call amdgpu_gfx i32 %fptr(i32 %i)
   ret i32 %ret
 }
 
 ; Calling a vgpr can never be a tail call.
 define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
-; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s6, v0
-; GCN-NEXT:    v_readfirstlane_b32 s7, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execnz .LBB9_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s6, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s7, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB9_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s6, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s7, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB9_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s6, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s7, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execnz .LBB9_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   tail call amdgpu_gfx void %fptr()
   ret void
 }
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index 9b52695fefb722..9a0bf1397028b7 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_C %s
+; RUN: FileCheck -check-prefix=REMARK %s < %t
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -attributor-assume-closed-world=false -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_O %s
 ; RUN: FileCheck -check-prefix=REMARK %s < %t
 
 ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
@@ -157,16 +159,27 @@ define void @empty_func() !dbg !8 {
   ret void
 }
 
-; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
-; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs: 39
-; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs: 32
-; STDERR-NEXT: remark: foo.cl:64:0:     AGPRs: 10
-; STDERR-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
-; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
+; STDERR_O: remark: foo.cl:64:0: Function Name: test_indirect_call
+; STDERR_O-NEXT: remark: foo.cl:64:0:     SGPRs: 39
+; STDERR_O-NEXT: remark: foo.cl:64:0:     VGPRs: 32
+; STDERR_O-NEXT: remark: foo.cl:64:0:     AGPRs: 10
+; STDERR_O-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True
+; STDERR_O-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
+; STDERR_O-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
+
+; STDERR_C:      remark: foo.cl:64:0: Function Name: test_indirect_call
+; STDERR_C-NEXT: remark: foo.cl:64:0:     SGPRs: 4
+; STDERR_C-NEXT: remark: foo.cl:64:0:     VGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     AGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     Dynamic Stack: False
+; STDERR_C-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
+; STDERR_C-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
 define amdgpu_kernel void @test_indirect_call() !dbg !9 {
@@ -175,17 +188,27 @@ define amdgpu_kernel void @test_indirect_call() !dbg !9 {
   ret void
 }
 
-; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
-; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs: 39
-; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs: 32
-; STDERR-NEXT: remark: foo.cl:74:0:     AGPRs: 10
-; STDERR-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
-; STDERR-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
-; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
+; STDERR_O: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
+; STDERR_O-NEXT: remark: foo.cl:74:0:     SGPRs: 39
+; STDERR_O-NEXT: remark: foo.cl:74:0:     VGPRs: 32
+; STDERR_O-NEXT: remark: foo.cl:74:0:     AGPRs: 10
+; STDERR_O-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
+; STDERR_O-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True
+; STDERR_O-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
+; STDERR_O-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
 
+; STDERR_C:      remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
+; STDERR_C-NEXT: remark: foo.cl:74:0:     SGPRs: 12
+; STDERR_C-NEXT: remark: foo.cl:74:0:     VGPRs: 1
+; STDERR_C-NEXT: remark: foo.cl:74:0:     AGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
+; STDERR_C-NEXT: remark: foo.cl:74:0:     Dynamic Stack: False
+; STDERR_C-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
+; STDERR_C-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg)
 
 define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 7a01679f9972c7..fa5538c40227ea 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 target datalayout = "A5"
 
 ; FIXME: Why is this commuted only sometimes?
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index dcc90c0dcd407e..a37e18d06a149a 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=AKF_GCN %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -attributor-assume-closed-world | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_CW %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_OW %s
 
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -attributor-assume-closed-world=false < %s | FileCheck -check-prefix=GFX9 %s
 
 target datalayout = "A5"
 
@@ -21,6 +22,17 @@ define internal void @indirect() {
   ret void
 }
 
+define ptr @helper() {
+; AKF_GCN-LABEL: define {{[^@]+}}@helper() {
+; AKF_GCN-NEXT:    ret ptr @indirect
+;
+; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@helper
+; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] {
+; ATTRIBUTOR_GCN-NEXT:    ret ptr @indirect
+;
+  ret ptr @indirect
+}
+
 define amdgpu_kernel void @test_simple_indirect_call() {
 ; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
 ; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
@@ -31,14 +43,23 @@ define amdgpu_kernel void @test_simple_indirect_call() {
 ; AKF_GCN-NEXT:    call void [[FP]]()
 ; AKF_GCN-NEXT:    ret void
 ;
-; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
-; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
-; ATTRIBUTOR_GCN-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
-; ATTRIBUTOR_GCN-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
-; ATTRIBUTOR_GCN-NEXT:    call void [[FP]]()
-; ATTRIBUTOR_GCN-NEXT:    ret void
+; ATTRIBUTOR_GCN_CW-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_GCN_CW-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
+; ATTRIBUTOR_GCN_CW-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_CW-NEXT:    call void @indirect()
+; ATTRIBUTOR_GCN_CW-NEXT:    ret void
+;
+; ATTRIBUTOR_GCN_OW-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_GCN_OW-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
+; ATTRIBUTOR_GCN_OW-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_OW-NEXT:    call void [[FP]]()
+; ATTRIBUTOR_GCN_OW-NEXT:    ret void
 ;
 ; GFX9-LABEL: test_simple_indirect_call:
 ; GFX9:       ; %bb.0:
@@ -73,6 +94,9 @@ define amdgpu_kernel void @test_simple_indirect_call() {
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_CW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+;.
+; ATTRIBUTOR_GCN_OW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 4a1957588a2243..70663d124e2a8d 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -54,6 +54,7 @@
 #include "llvm/TargetParser/Host.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
 #include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <memory>
 #include <optional>
@@ -717,9 +718,10 @@ static int compileModule(char **argv, LLVMContext &Context) {
       TPC.setInitialized();
       PM.add(createPrintMIRPass(*OS));
       PM.add(createFreeMachineFunctionPass());
-    } else if (Target->addPassesToEmitFile(
-                   PM, *OS, DwoOut ? &DwoOut->os() : nullptr,
-                   codegen::getFileType(), NoVerify, MMIWP)) {
+    } else if (Target->addPassesToEmitFile(PM, *OS,
+                                           DwoOut ? &DwoOut->os() : nullptr,
+                                           codegen::getFileType(), NoVerify,
+                                           MMIWP, TheTriple.isAMDGPU())) {
       reportError("target does not support generation of this file type");
     }
 



More information about the llvm-commits mailing list