[llvm] 06b9017 - AMDGPU: Remove fixed function ABI option
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 10 16:41:24 PST 2021
Author: Matt Arsenault
Date: 2021-12-10T19:41:19-05:00
New Revision: 06b90175e7e3ff2c9298ad3a15a00c1f04ae7029
URL: https://github.com/llvm/llvm-project/commit/06b90175e7e3ff2c9298ad3a15a00c1f04ae7029
DIFF: https://github.com/llvm/llvm-project/commit/06b90175e7e3ff2c9298ad3a15a00c1f04ae7029.diff
LOG: AMDGPU: Remove fixed function ABI option
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
llvm/test/CodeGen/AMDGPU/call-constant.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
llvm/test/CodeGen/AMDGPU/indirect-call.ll
llvm/test/CodeGen/AMDGPU/returnaddress.ll
Removed:
llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index aab76d27ef11..d28f38e42430 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -173,14 +173,7 @@ constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
const AMDGPUFunctionArgInfo &
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
auto I = ArgInfoMap.find(&F);
- if (I == ArgInfoMap.end()) {
- if (AMDGPUTargetMachine::EnableFixedFunctionABI)
- return FixedABIFunctionInfo;
-
- // Without the fixed ABI, we assume no function has special inputs.
- assert(F.isDeclaration());
- return ExternFunctionInfo;
- }
-
+ if (I == ArgInfoMap.end())
+ return FixedABIFunctionInfo;
return I->second;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 3c4a84ca46d7..d122d01aefe2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -717,8 +717,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
if (!IsEntryFunc) {
// For the fixed ABI, pass workitem IDs in the last argument register.
- if (AMDGPUTargetMachine::EnableFixedFunctionABI)
- TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
+ TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
}
IncomingValueAssigner Assigner(AssignFn);
@@ -731,11 +730,6 @@ bool AMDGPUCallLowering::lowerFormalArguments(
uint64_t StackOffset = Assigner.StackOffset;
- if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
- // Special inputs come after user arguments.
- TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
- }
-
// Start adding system SGPRs.
if (IsEntryFunc) {
TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
@@ -1236,8 +1230,7 @@ bool AMDGPUCallLowering::lowerTailCall(
// after the ordinary user argument registers.
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
- if (AMDGPUTargetMachine::EnableFixedFunctionABI &&
- Info.CallConv != CallingConv::AMDGPU_Gfx) {
+ if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
return false;
@@ -1303,12 +1296,6 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
const DataLayout &DL = F.getParent()->getDataLayout();
- if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&
- Info.CallConv != CallingConv::AMDGPU_Gfx) {
- LLVM_DEBUG(dbgs() << "Variable function ABI not implemented\n");
- return false;
- }
-
SmallVector<ArgInfo, 8> OutArgs;
for (auto &OrigArg : Info.OrigArgs)
splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
@@ -1362,8 +1349,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// after the ordinary user argument registers.
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
- if (AMDGPUTargetMachine::EnableFixedFunctionABI &&
- Info.CallConv != CallingConv::AMDGPU_Gfx) {
+ if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 530b75fb63c8..a2c61f9da8da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -231,13 +231,6 @@ static cl::opt<bool, true> LateCFGStructurize(
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
cl::Hidden);
-static cl::opt<bool, true> EnableAMDGPUFixedFunctionABIOpt(
- "amdgpu-fixed-function-abi",
- cl::desc("Enable all implicit function arguments"),
- cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI),
- cl::init(true),
- cl::Hidden);
-
// Enable lib calls simplifications
static cl::opt<bool> EnableLibCallSimplify(
"amdgpu-simplify-libcall",
@@ -505,7 +498,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
-bool AMDGPUTargetMachine::EnableFixedFunctionABI = false;
bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 0ff2db2a52d9..226646a96953 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -37,7 +37,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
public:
static bool EnableLateStructurizeCFG;
static bool EnableFunctionCalls;
- static bool EnableFixedFunctionABI;
static bool EnableLowerModuleLDS;
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 90370362f9e0..3827d05074bf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2063,33 +2063,30 @@ void SITargetLowering::allocateSpecialInputSGPRs(
SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
- // We need to allocate these in place regardless of their use.
- const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI;
-
// TODO: Unify handling with private memory pointers.
- if (IsFixed || Info.hasDispatchPtr())
+ if (Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
- if (IsFixed || Info.hasQueuePtr())
+ if (Info.hasQueuePtr())
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
// constant offset from the kernarg segment.
- if (IsFixed || Info.hasImplicitArgPtr())
+ if (Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
- if (IsFixed || Info.hasDispatchID())
+ if (Info.hasDispatchID())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
- if (IsFixed || Info.hasWorkGroupIDX())
+ if (Info.hasWorkGroupIDX())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);
- if (IsFixed || Info.hasWorkGroupIDY())
+ if (Info.hasWorkGroupIDY())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);
- if (IsFixed || Info.hasWorkGroupIDZ())
+ if (Info.hasWorkGroupIDZ())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
}
@@ -2422,8 +2419,7 @@ SDValue SITargetLowering::LowerFormalArguments(
allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
} else {
// For the fixed ABI, pass workitem IDs in the last argument register.
- if (AMDGPUTargetMachine::EnableFixedFunctionABI)
- allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
+ allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
}
if (IsKernel) {
@@ -2550,11 +2546,6 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
- if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
- // Special inputs come after user arguments.
- allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
- }
-
// Start adding system SGPRs.
if (IsEntryFunc) {
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
@@ -3124,8 +3115,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
- if (AMDGPUTargetMachine::EnableFixedFunctionABI &&
- CallConv != CallingConv::AMDGPU_Gfx) {
+ if (CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
}
@@ -3264,12 +3254,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
}
- if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&
- CallConv != CallingConv::AMDGPU_Gfx) {
- // Copy special input registers after user input arguments.
- passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
- }
-
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 062d5ff4a036..6f92ac6dac45 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -62,11 +62,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// calls.
const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
- // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
- // have any calls.
- const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
- CC != CallingConv::AMDGPU_Gfx &&
- (!isEntryFunction() || HasCalls);
const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
CC == CallingConv::SPIR_KERNEL;
@@ -80,7 +75,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
if (!isEntryFunction()) {
- if (UseFixedABI)
+ if (CC != CallingConv::AMDGPU_Gfx)
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
// TODO: Pick a high register, and shift down, similar to a kernel.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
index a01b5b80585f..9ed3ec23a181 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
@@ -33,8 +33,8 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) {
; CHECK-NEXT: bb.2.atomicrmw.start:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %16(s64), %bb.2, [[C1]](s64), %bb.1
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %9(s64), %bb.2, [[C1]](s64), %bb.1
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2
; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]]
; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 6dc2148a5d31..2e647da9b5df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
; Test that we don't insert code to pass implicit arguments we know
; the callee does not need.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index be5396ecdcd1..00f2e4b620c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s
; Workitem IDs are passed to the kernel
diff erently for gfx908
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index a727c76b975a..17d8f497cbcd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i1 @external_i1_func_void() #0
declare zeroext i1 @external_i1_zeroext_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index 6ca94d551fbd..c5b4b95df52e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 0071ca9275e7..d81a40bfe6d0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
declare hidden void @external_void_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
index b9bc22eaa3e2..10218b9a88d0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
@@ -41,8 +41,8 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: %10:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $vgpr0 = COPY %10(s32)
+ ; CHECK-NEXT: %3:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY %3(s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
@@ -57,8 +57,8 @@ define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $vgpr0 = COPY %10(s32)
+ ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY %3(s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
@@ -115,8 +115,8 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: %14:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %14(<2 x s32>)
+ ; CHECK-NEXT: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
@@ -155,8 +155,8 @@ define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $vgpr0 = COPY %10(s32)
+ ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY %3(s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
@@ -171,8 +171,8 @@ define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $vgpr0 = COPY %10(s32)
+ ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY %3(s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
@@ -187,8 +187,8 @@ define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $vgpr0 = COPY %10(s32)
+ ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY %3(s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
@@ -203,8 +203,8 @@ define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $vgpr0 = COPY %10(s32)
+ ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY %3(s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
@@ -220,8 +220,8 @@ define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, flo
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: %11:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; CHECK-NEXT: $vgpr0 = COPY %11(s32)
+ ; CHECK-NEXT: %4:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY %4(s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore")
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
index 515db98c940d..3b07fa4828e4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -amdgpu-fixed-function-abi=1 -stop-after=irtranslator -o - %s | FileCheck --check-prefix=FIXED %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -amdgpu-fixed-function-abi=0 -stop-after=irtranslator -o - %s | FileCheck --check-prefix=VARABI %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=irtranslator -o - %s | FileCheck --check-prefix=FIXED %s
; Make sure arg1 is not allocated in v31, which is reserved for
; workitem IDs with -amdgpu-fixed-function-abi.
@@ -47,46 +46,6 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) {
; FIXED: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; FIXED: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
; FIXED: S_SETPC_B64_return [[COPY32]]
- ; VARABI-LABEL: name: void_a31i32_i32
- ; VARABI: bb.1 (%ir-block.0):
- ; VARABI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31
- ; VARABI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; VARABI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; VARABI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; VARABI: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; VARABI: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; VARABI: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; VARABI: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; VARABI: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; VARABI: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; VARABI: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; VARABI: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; VARABI: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; VARABI: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; VARABI: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; VARABI: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; VARABI: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; VARABI: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; VARABI: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; VARABI: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; VARABI: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; VARABI: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; VARABI: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; VARABI: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; VARABI: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; VARABI: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; VARABI: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; VARABI: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; VARABI: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; VARABI: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; VARABI: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; VARABI: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
- ; VARABI: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
- ; VARABI: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; VARABI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; VARABI: G_STORE [[COPY31]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; VARABI: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
- ; VARABI: S_SETPC_B64_return [[COPY33]]
store i32 %arg1, i32 addrspace(1)* undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index e635c5b3649a..cc2c95324c2e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index 09ecc55222ff..d31e73e91b60 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -54,9 +54,9 @@ define i32 @asm_vgpr_early_clobber() {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %9, !0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %2, !0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
@@ -90,8 +90,8 @@ define i32 @test_single_vgpr_output() nounwind {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
@@ -106,8 +106,8 @@ define i32 @test_single_sgpr_output_s32() nounwind {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
@@ -123,9 +123,9 @@ define float @test_multiple_register_outputs_same() #0 {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8, 1835018 /* regdef:VGPR_32 */, def %9
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835018 /* regdef:VGPR_32 */, def %2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY1]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
@@ -144,9 +144,9 @@ define double @test_multiple_register_outputs_mixed() #0 {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8, 2949130 /* regdef:VReg_64 */, def %9
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
@@ -217,8 +217,8 @@ define float @test_input_vgpr(i32 %src) nounwind {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
- ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %9, 1835017 /* reguse:VGPR_32 */, [[COPY2]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 1835017 /* reguse:VGPR_32 */, [[COPY2]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %2
; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
@@ -234,8 +234,8 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %9, 196622 /* mem:m */, [[COPY]](p3)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 196622 /* mem:m */, [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
@@ -253,8 +253,8 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32)
- ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %11
+ ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %4
; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
@@ -269,14 +269,14 @@ define i32 @test_sgpr_matching_constraint() nounwind {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %10
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32)
- ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %12, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3)
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY %12
+ ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY %5
; CHECK-NEXT: $vgpr0 = COPY [[COPY5]](s32)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0
@@ -300,10 +300,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
- ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11, 1835018 /* regdef:VGPR_32 */, def %12, 1835018 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5)
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %11
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %12
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY %13
+ ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY %6
; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -325,11 +325,11 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %10
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %3
; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 340ad256cb14..d6da4fca2198 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; This is a copy of sibling-call.ll, but stops after the IRTranslator.
define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
index 055ac7779ace..ac1d289e1227 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
declare hidden void @external_void_func_void()
diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
index 329c96a1fb3b..11084620176d 100644
--- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=0 < %s | FileCheck -check-prefix=VARABI %s
-; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s
-; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s
+; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s
+; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s
; Test with gfx803 so that
; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require
@@ -15,31 +14,6 @@ declare hidden void @requires_all_inputs()
; does not require the implicit arguments to the function. Make sure
; we do not crash.
define void @parent_func_missing_inputs() #0 {
-; VARABI-LABEL: parent_func_missing_inputs:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: s_or_saveexec_b64 s[4:5], -1
-; VARABI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; VARABI-NEXT: s_mov_b64 exec, s[4:5]
-; VARABI-NEXT: v_writelane_b32 v40, s33, 2
-; VARABI-NEXT: s_mov_b32 s33, s32
-; VARABI-NEXT: s_addk_i32 s32, 0x400
-; VARABI-NEXT: v_writelane_b32 v40, s30, 0
-; VARABI-NEXT: v_writelane_b32 v40, s31, 1
-; VARABI-NEXT: s_getpc_b64 s[4:5]
-; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs at rel32@lo+4
-; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs at rel32@hi+12
-; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; VARABI-NEXT: v_readlane_b32 s4, v40, 0
-; VARABI-NEXT: v_readlane_b32 s5, v40, 1
-; VARABI-NEXT: s_addk_i32 s32, 0xfc00
-; VARABI-NEXT: v_readlane_b32 s33, v40, 2
-; VARABI-NEXT: s_or_saveexec_b64 s[6:7], -1
-; VARABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; VARABI-NEXT: s_mov_b64 exec, s[6:7]
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: s_setpc_b64 s[4:5]
-;
; FIXEDABI-LABEL: parent_func_missing_inputs:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -69,20 +43,6 @@ define void @parent_func_missing_inputs() #0 {
}
define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
-; VARABI-LABEL: parent_kernel_missing_inputs:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_add_i32 s4, s4, s9
-; VARABI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
-; VARABI-NEXT: s_add_u32 s0, s0, s9
-; VARABI-NEXT: s_addc_u32 s1, s1, 0
-; VARABI-NEXT: s_mov_b32 s32, 0
-; VARABI-NEXT: s_mov_b32 flat_scratch_lo, s5
-; VARABI-NEXT: s_getpc_b64 s[4:5]
-; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs at rel32@lo+4
-; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs at rel32@hi+12
-; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; VARABI-NEXT: s_endpgm
-;
; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs:
; FIXEDABI-SDAG: ; %bb.0:
; FIXEDABI-SDAG-NEXT: s_add_i32 s4, s4, s9
@@ -132,20 +92,6 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
define void @marked_func_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
-; VARABI-LABEL: marked_func_use_workitem_id:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: v_and_b32_e32 v3, 0x3ff, v2
-; VARABI-NEXT: flat_store_dword v[0:1], v3
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: v_bfe_u32 v3, v2, 10, 10
-; VARABI-NEXT: v_bfe_u32 v2, v2, 20, 10
-; VARABI-NEXT: flat_store_dword v[0:1], v3
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: flat_store_dword v[0:1], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: s_setpc_b64 s[30:31]
-;
; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id:
; FIXEDABI-SDAG: ; %bb.0:
; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -184,20 +130,6 @@ define void @marked_func_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
define amdgpu_kernel void @marked_kernel_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
-; VARABI-LABEL: marked_kernel_use_workitem_id:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VARABI-NEXT: s_waitcnt lgkmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v4, s1
-; VARABI-NEXT: v_mov_b32_e32 v3, s0
-; VARABI-NEXT: flat_store_dword v[3:4], v0
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: flat_store_dword v[3:4], v1
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: flat_store_dword v[3:4], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: s_endpgm
-;
; FIXEDABI-LABEL: marked_kernel_use_workitem_id:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -221,20 +153,6 @@ define amdgpu_kernel void @marked_kernel_use_workitem_id(i32 addrspace(1)* %ptr)
}
define void @marked_func_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
-; VARABI-LABEL: marked_func_use_workgroup_id:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v2, s4
-; VARABI-NEXT: flat_store_dword v[0:1], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v2, s5
-; VARABI-NEXT: flat_store_dword v[0:1], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v2, s6
-; VARABI-NEXT: flat_store_dword v[0:1], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: s_setpc_b64 s[30:31]
-;
; FIXEDABI-LABEL: marked_func_use_workgroup_id:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -258,23 +176,6 @@ define void @marked_func_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
}
define amdgpu_kernel void @marked_kernel_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
-; VARABI-LABEL: marked_kernel_use_workgroup_id:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VARABI-NEXT: v_mov_b32_e32 v2, s6
-; VARABI-NEXT: s_waitcnt lgkmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v0, s0
-; VARABI-NEXT: v_mov_b32_e32 v1, s1
-; VARABI-NEXT: flat_store_dword v[0:1], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v2, s7
-; VARABI-NEXT: flat_store_dword v[0:1], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v2, s8
-; VARABI-NEXT: flat_store_dword v[0:1], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: s_endpgm
-;
; FIXEDABI-LABEL: marked_kernel_use_workgroup_id:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -301,17 +202,6 @@ define amdgpu_kernel void @marked_kernel_use_workgroup_id(i32 addrspace(1)* %ptr
}
define void @marked_func_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
-; VARABI-LABEL: marked_func_use_other_sgpr:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: s_setpc_b64 s[30:31]
-;
; FIXEDABI-LABEL: marked_func_use_other_sgpr:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -344,19 +234,6 @@ define void @marked_func_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
}
define amdgpu_kernel void @marked_kernel_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
-; VARABI-LABEL: marked_kernel_use_other_sgpr:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_add_u32 s0, s4, 8
-; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
-; VARABI-NEXT: s_addc_u32 s1, s5, 0
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v0, s0
-; VARABI-NEXT: v_mov_b32_e32 v1, s1
-; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
-; VARABI-NEXT: s_endpgm
-;
; FIXEDABI-LABEL: marked_kernel_use_other_sgpr:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_add_u32 s0, s4, 8
@@ -381,13 +258,6 @@ define amdgpu_kernel void @marked_kernel_use_other_sgpr(i64 addrspace(1)* %ptr)
}
define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 {
-; VARABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: v_mov_b32_e32 v0, 0
-; VARABI-NEXT: v_mov_b32_e32 v1, 0
-; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
-; VARABI-NEXT: s_endpgm
-;
; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: v_mov_b32_e32 v0, 0
@@ -401,23 +271,6 @@ define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 {
; On gfx8, the queue ptr is required for this addrspacecast.
define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) #0 {
-; VARABI-LABEL: addrspacecast_requires_queue_ptr:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
-; VARABI-NEXT: v_mov_b32_e32 v3, 0
-; VARABI-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
-; VARABI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1
-; VARABI-NEXT: v_mov_b32_e32 v4, 1
-; VARABI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VARABI-NEXT: v_mov_b32_e32 v1, v3
-; VARABI-NEXT: flat_store_dword v[2:3], v4
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: v_mov_b32_e32 v2, 2
-; VARABI-NEXT: flat_store_dword v[0:1], v2
-; VARABI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; VARABI-NEXT: s_setpc_b64 s[30:31]
-;
; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr:
; FIXEDABI-SDAG: ; %bb.0:
; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -469,14 +322,6 @@ define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i3
}
define void @is_shared_requires_queue_ptr(i8* %ptr) #0 {
-; VARABI-LABEL: is_shared_requires_queue_ptr:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; VARABI-NEXT: flat_store_dword v[0:1], v0
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: s_setpc_b64 s[30:31]
-;
; FIXEDABI-LABEL: is_shared_requires_queue_ptr:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -494,14 +339,6 @@ define void @is_shared_requires_queue_ptr(i8* %ptr) #0 {
}
define void @is_private_requires_queue_ptr(i8* %ptr) #0 {
-; VARABI-LABEL: is_private_requires_queue_ptr:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; VARABI-NEXT: flat_store_dword v[0:1], v0
-; VARABI-NEXT: s_waitcnt vmcnt(0)
-; VARABI-NEXT: s_setpc_b64 s[30:31]
-;
; FIXEDABI-LABEL: is_private_requires_queue_ptr:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -519,12 +356,6 @@ define void @is_private_requires_queue_ptr(i8* %ptr) #0 {
}
define void @trap_requires_queue() #0 {
-; VARABI-LABEL: trap_requires_queue:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: s_mov_b64 s[0:1], 0
-; VARABI-NEXT: s_trap 2
-;
; FIXEDABI-LABEL: trap_requires_queue:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -535,11 +366,6 @@ define void @trap_requires_queue() #0 {
}
define void @debugtrap_requires_queue() #0 {
-; VARABI-LABEL: debugtrap_requires_queue:
-; VARABI: ; %bb.0:
-; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VARABI-NEXT: s_trap 3
-;
; FIXEDABI-LABEL: debugtrap_requires_queue:
; FIXEDABI: ; %bb.0:
; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index f37a34c753ec..19ba7792f5b7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -2,9 +2,6 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
-; Make sure this interacts well with -amdgpu-fixed-function-abi
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
-
declare float @extern_func(float) #0
declare float @extern_func_many_args(<64 x float>) #0
diff --git a/llvm/test/CodeGen/AMDGPU/call-constant.ll b/llvm/test/CodeGen/AMDGPU/call-constant.ll
index e4d2094348eb..1baa9fe259f0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-constant.ll
@@ -1,5 +1,5 @@
-; RUN: llc -global-isel=0 -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,SDAG %s
-; RUN: llc -global-isel=1 -amdgpu-fixed-function-abi=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,GISEL %s
; GCN-LABEL: {{^}}test_call_undef:
; GCN: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index 391b4c93794b..351aabf25738 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -1,5 +1,5 @@
-; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}use_dispatch_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
@@ -10,6 +10,16 @@ define hidden void @use_dispatch_ptr() #1 {
ret void
}
+; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
+; GCN-NOT: s[4:5]
+; GCN-NOT: s4
+; GCN-NOT: s5
+; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
+define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
+ call void @use_dispatch_ptr()
+ ret void
+}
+
; GCN-LABEL: {{^}}use_queue_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
define hidden void @use_queue_ptr() #1 {
@@ -19,6 +29,39 @@ define hidden void @use_queue_ptr() #1 {
ret void
}
+; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
+; GCN: s_mov_b64 s[6:7], s[4:5]
+; GCN: .amdhsa_user_sgpr_queue_ptr 1
+define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
+ call void @use_queue_ptr()
+ ret void
+}
+
+; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
+; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[6:7], 0x10
+; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
+; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
+; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
+; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
+define hidden void @use_queue_ptr_addrspacecast() #1 {
+ %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
+ store volatile i32 0, i32* %asc
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
+; CIVI: s_mov_b64 s[6:7], s[4:5]
+; CIVI: .amdhsa_user_sgpr_queue_ptr 1
+
+; GFX9-NOT: s_mov_b64 s[6:7]
+; GFX9: .amdhsa_user_sgpr_queue_ptr 0
+define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
+ call void @use_queue_ptr_addrspacecast()
+ ret void
+}
+
+; Not really supported in callable functions.
; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0
; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0
@@ -38,6 +81,13 @@ define hidden void @use_implicitarg_ptr() #1 {
ret void
}
+; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
+; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
+ call void @use_kernarg_segment_ptr()
+ ret void
+}
+
; GCN-LABEL: {{^}}use_dispatch_id:
; GCN: ; use s[10:11]
define hidden void @use_dispatch_id() #1 {
@@ -45,6 +95,18 @@ define hidden void @use_dispatch_id() #1 {
call void asm sideeffect "; use $0", "s"(i64 %id)
ret void
}
+
+; No kernarg segment so that there is a mov to check. With kernarg
+; pointer enabled, it happens to end up in the right place anyway.
+
+; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
+; GCN: s_mov_b64 s[10:11], s[4:5]
+; GCN: .amdhsa_user_sgpr_dispatch_id 1
+define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
+ call void @use_dispatch_id()
+ ret void
+}
+
; GCN-LABEL: {{^}}use_workgroup_id_x:
; GCN: s_waitcnt
; GCN: ; use s12
@@ -133,6 +195,123 @@ define hidden void @use_workgroup_id_yz() #1 {
ret void
}
+; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
+; GCN-NOT: s6
+; GCN: s_mov_b32 s12, s6
+; GCN: s_mov_b32 s32, 0
+; GCN: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workgroup_id_x at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workgroup_id_x at rel32@hi+12
+; GCN: s_swappc_b64
+; GCN-NEXT: s_endpgm
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
+ call void @use_workgroup_id_x()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
+; GCN-NOT: s12
+; GCN: s_mov_b32 s13, s7
+; GCN-NOT: s12
+; GCN: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
+ call void @use_workgroup_id_y()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
+; GCN-NOT: s12
+; GCN-NOT: s13
+; GCN: s_mov_b32 s14, s7
+; GCN-NOT: s12
+; GCN-NOT: s13
+
+; GCN: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
+ call void @use_workgroup_id_z()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
+; GCN-NOT: s14
+; GCN: s_mov_b32 s12, s6
+; GCN-NEXT: s_mov_b32 s13, s7
+; GCN-NOT: s14
+
+; GCN: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
+ call void @use_workgroup_id_xy()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
+; GCN: s_mov_b32 s12, s6
+; GCN: s_mov_b32 s13, s7
+; GCN: s_mov_b32 s14, s8
+; GCN: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
+ call void @use_workgroup_id_xyz()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
+
+; GCN-NOT: s13
+; GCN: s_mov_b32 s12, s6
+; GCN-NEXT: s_mov_b32 s14, s7
+; GCN-NOT: s13
+
+; GCN: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
+ call void @use_workgroup_id_xz()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
+
+; GCN: s_mov_b32 s13, s7
+; GCN: s_mov_b32 s14, s8
+
+; GCN: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
+ call void @use_workgroup_id_yz()
+ ret void
+}
+
; Argument is in right place already
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
; GCN-NOT: s12
@@ -197,6 +376,56 @@ define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
ret void
}
+; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
+
+; GCN-NOT: s13
+; GCN-NOT: s14
+; GCN-DAG: s_mov_b32 s12, s6
+; GCN-DAG: v_mov_b32_e32 v0, 0x22b
+; GCN-NOT: s13
+; GCN-NOT: s14
+
+; GCN-DAG: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
+define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
+ call void @other_arg_use_workgroup_id_x(i32 555)
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
+; GCN-DAG: v_mov_b32_e32 v0, 0x22b
+; GCN-DAG: s_mov_b32 s13, s7
+
+; GCN-DAG: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
+define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
+ call void @other_arg_use_workgroup_id_y(i32 555)
+ ret void
+}
+
+; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
+; GCN-DAG: v_mov_b32_e32 v0, 0x22b
+; GCN-DAG: s_mov_b32 s14, s7
+
+; GCN: s_mov_b32 s32, 0
+; GCN: s_swappc_b64
+
+; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
+; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
+define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
+ call void @other_arg_use_workgroup_id_z(i32 555)
+ ret void
+}
+
; GCN-LABEL: {{^}}use_every_sgpr_input:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
deleted file mode 100644
index 19df6c23f484..000000000000
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ /dev/null
@@ -1,616 +0,0 @@
-; RUN: llc -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s
-
-; GCN-LABEL: {{^}}use_dispatch_ptr:
-; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
-define hidden void @use_dispatch_ptr() #1 {
- %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
- %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
- %value = load volatile i32, i32 addrspace(4)* %header_ptr
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
-; GCN: enable_sgpr_dispatch_ptr = 1
-; GCN-NOT: s[4:5]
-; GCN-NOT: s4
-; GCN-NOT: s5
-define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
- call void @use_dispatch_ptr()
- ret void
-}
-
-; GCN-LABEL: {{^}}use_queue_ptr:
-; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
-define hidden void @use_queue_ptr() #1 {
- %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
- %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
- %value = load volatile i32, i32 addrspace(4)* %header_ptr
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
-; GCN: enable_sgpr_queue_ptr = 1
-; GCN-NOT: s[4:5]
-; GCN-NOT: s4
-; GCN-NOT: s5
-define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
- call void @use_queue_ptr()
- ret void
-}
-
-; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
-; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x10
-; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
-; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
-; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
-; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
-; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
-define hidden void @use_queue_ptr_addrspacecast() #1 {
- %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
- store volatile i32 0, i32* %asc
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
-; CIVI: enable_sgpr_queue_ptr = 1
-; CIVI-NOT: s[4:5]
-; CIVI-NOT: s4
-; CIVI-NOT: s5
-define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
- call void @use_queue_ptr_addrspacecast()
- ret void
-}
-
-; Not really supported in callable functions.
-; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
-; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0{{$}}
-; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0{{$}}
-define hidden void @use_kernarg_segment_ptr() #1 {
- %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
- %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
- %value = load volatile i32, i32 addrspace(4)* %header_ptr
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
-define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
- call void @use_kernarg_segment_ptr()
- ret void
-}
-
-; GCN-LABEL: {{^}}use_dispatch_id:
-; GCN: ; use s[4:5]
-define hidden void @use_dispatch_id() #1 {
- %id = call i64 @llvm.amdgcn.dispatch.id()
- call void asm sideeffect "; use $0", "s"(i64 %id)
- ret void
-}
-
-; No kernarg segment so that there is a mov to check. With kernarg
-; pointer enabled, it happens to end up in the right place anyway.
-
-; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
-; GCN: enable_sgpr_dispatch_id = 1
-; GCN-NOT: s[4:5]
-; GCN-NOT: s4
-; GCN-NOT: s5
-define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
- call void @use_dispatch_id()
- ret void
-}
-
-; GCN-LABEL: {{^}}use_workgroup_id_x:
-; GCN: s_waitcnt
-; GCN: ; use s4
-define hidden void @use_workgroup_id_x() #1 {
- %val = call i32 @llvm.amdgcn.workgroup.id.x()
- call void asm sideeffect "; use $0", "s"(i32 %val)
- ret void
-}
-
-; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
-; GCN: s_waitcnt
-; GCN-NOT: s32
-; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
-; GCN: ; use s4
-; GCN: s_setpc_b64
-define hidden void @use_stack_workgroup_id_x() #1 {
- %alloca = alloca i32, addrspace(5)
- store volatile i32 0, i32 addrspace(5)* %alloca
- %val = call i32 @llvm.amdgcn.workgroup.id.x()
- call void asm sideeffect "; use $0", "s"(i32 %val)
- ret void
-}
-
-; GCN-LABEL: {{^}}use_workgroup_id_y:
-; GCN: s_waitcnt
-; GCN: ; use s4
-define hidden void @use_workgroup_id_y() #1 {
- %val = call i32 @llvm.amdgcn.workgroup.id.y()
- call void asm sideeffect "; use $0", "s"(i32 %val)
- ret void
-}
-
-; GCN-LABEL: {{^}}use_workgroup_id_z:
-; GCN: s_waitcnt
-; GCN: ; use s4
-define hidden void @use_workgroup_id_z() #1 {
- %val = call i32 @llvm.amdgcn.workgroup.id.z()
- call void asm sideeffect "; use $0", "s"(i32 %val)
- ret void
-}
-
-; GCN-LABEL: {{^}}use_workgroup_id_xy:
-; GCN: ; use s4
-; GCN: ; use s5
-define hidden void @use_workgroup_id_xy() #1 {
- %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
- %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
- call void asm sideeffect "; use $0", "s"(i32 %val0)
- call void asm sideeffect "; use $0", "s"(i32 %val1)
- ret void
-}
-
-; GCN-LABEL: {{^}}use_workgroup_id_xyz:
-; GCN: ; use s4
-; GCN: ; use s5
-; GCN: ; use s6
-define hidden void @use_workgroup_id_xyz() #1 {
- %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
- %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
- %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
- call void asm sideeffect "; use $0", "s"(i32 %val0)
- call void asm sideeffect "; use $0", "s"(i32 %val1)
- call void asm sideeffect "; use $0", "s"(i32 %val2)
- ret void
-}
-
-; GCN-LABEL: {{^}}use_workgroup_id_xz:
-; GCN: ; use s4
-; GCN: ; use s5
-define hidden void @use_workgroup_id_xz() #1 {
- %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
- %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
- call void asm sideeffect "; use $0", "s"(i32 %val0)
- call void asm sideeffect "; use $0", "s"(i32 %val1)
- ret void
-}
-
-; GCN-LABEL: {{^}}use_workgroup_id_yz:
-; GCN: ; use s4
-; GCN: ; use s5
-define hidden void @use_workgroup_id_yz() #1 {
- %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
- %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
- call void asm sideeffect "; use $0", "s"(i32 %val0)
- call void asm sideeffect "; use $0", "s"(i32 %val1)
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 0
-; GCN: enable_sgpr_workgroup_id_z = 0
-
-; GCN-NOT: s6
-; GCN: s_mov_b32 s4, s6
-; GCN: s_mov_b32 s32, 0
-; GCN: s_getpc_b64 s[6:7]
-; GCN-NEXT: s_add_u32 s6, s6, use_workgroup_id_x at rel32@lo+4
-; GCN-NEXT: s_addc_u32 s7, s7, use_workgroup_id_x at rel32@hi+12
-; GCN: s_swappc_b64
-; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
- call void @use_workgroup_id_x()
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 1
-; GCN: enable_sgpr_workgroup_id_z = 0
-
-; GCN: s_mov_b32 s4, s7
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
- call void @use_workgroup_id_y()
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 0
-; GCN: enable_sgpr_workgroup_id_z = 1
-
-; GCN: s_mov_b32 s4, s7
-
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
- call void @use_workgroup_id_z()
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 1
-; GCN: enable_sgpr_workgroup_id_z = 0
-
-; GCN: s_mov_b32 s5, s7
-; GCN: s_mov_b32 s4, s6
-
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
- call void @use_workgroup_id_xy()
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 1
-; GCN: enable_sgpr_workgroup_id_z = 1
-
-; GCN: s_mov_b32 s5, s7
-; GCN: s_mov_b32 s4, s6
-; GCN: s_mov_b32 s6, s8
-
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
- call void @use_workgroup_id_xyz()
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 0
-; GCN: enable_sgpr_workgroup_id_z = 1
-
-; GCN: s_mov_b32 s5, s7
-; GCN: s_mov_b32 s4, s6
-
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
- call void @use_workgroup_id_xz()
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 1
-; GCN: enable_sgpr_workgroup_id_z = 1
-
-; GCN: s_mov_b32 s5, s8
-; GCN: s_mov_b32 s4, s7
-
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
- call void @use_workgroup_id_yz()
- ret void
-}
-
-; Argument is in right place already
-; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
-; GCN-NOT: s4
-; GCN: v_readlane_b32 s4, v40, 0
-define hidden void @func_indirect_use_workgroup_id_x() #1 {
- call void @use_workgroup_id_x()
- ret void
-}
-
-; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
-; GCN-NOT: s4
-; GCN: v_readlane_b32 s4, v40, 0
-define hidden void @func_indirect_use_workgroup_id_y() #1 {
- call void @use_workgroup_id_y()
- ret void
-}
-
-; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
-; GCN-NOT: s4
-; GCN: v_readlane_b32 s4, v40, 0
-define hidden void @func_indirect_use_workgroup_id_z() #1 {
- call void @use_workgroup_id_z()
- ret void
-}
-
-; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN: ; use s4
-define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
- %val = call i32 @llvm.amdgcn.workgroup.id.x()
- store volatile i32 %arg0, i32 addrspace(1)* undef
- call void asm sideeffect "; use $0", "s"(i32 %val)
- ret void
-}
-
-; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN: ; use s4
-define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
- %val = call i32 @llvm.amdgcn.workgroup.id.y()
- store volatile i32 %arg0, i32 addrspace(1)* undef
- call void asm sideeffect "; use $0", "s"(i32 %val)
- ret void
-}
-
-; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN: ; use s4
-define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
- %val = call i32 @llvm.amdgcn.workgroup.id.z()
- store volatile i32 %arg0, i32 addrspace(1)* undef
- call void asm sideeffect "; use $0", "s"(i32 %val)
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 0
-; GCN: enable_sgpr_workgroup_id_z = 0
-
-; GCN-DAG: v_mov_b32_e32 v0, 0x22b
-; GCN-DAG: s_mov_b32 s4, s6
-
-; GCN-DAG: s_mov_b32 s32, 0
-; GCN-NOT: s4
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
- call void @other_arg_use_workgroup_id_x(i32 555)
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 1
-; GCN: enable_sgpr_workgroup_id_z = 0
-
-; GCN-DAG: v_mov_b32_e32 v0, 0x22b
-; GCN-DAG: s_mov_b32 s4, s7
-
-; GCN-DAG: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
- call void @other_arg_use_workgroup_id_y(i32 555)
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 0
-; GCN: enable_sgpr_workgroup_id_z = 1
-
-; GCN-DAG: v_mov_b32_e32 v0, 0x22b
-
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
- call void @other_arg_use_workgroup_id_z(i32 555)
- ret void
-}
-
-; GCN-LABEL: {{^}}use_every_sgpr_input:
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
-; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
-; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
-; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
-
-; GCN: ; use s[10:11]
-; GCN: ; use s12
-; GCN: ; use s13
-; GCN: ; use s14
-define hidden void @use_every_sgpr_input() #1 {
- %alloca = alloca i32, align 4, addrspace(5)
- store volatile i32 0, i32 addrspace(5)* %alloca
-
- %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
- %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
- %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
-
- %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
- %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
- %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
-
- %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
- %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
- %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
-
- %val3 = call i64 @llvm.amdgcn.dispatch.id()
- call void asm sideeffect "; use $0", "s"(i64 %val3)
-
- %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
- call void asm sideeffect "; use $0", "s"(i32 %val4)
-
- %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
- call void asm sideeffect "; use $0", "s"(i32 %val5)
-
- %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
- call void asm sideeffect "; use $0", "s"(i32 %val6)
-
- ret void
-}
-
-; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 1
-; GCN: enable_sgpr_workgroup_id_z = 1
-; GCN: enable_sgpr_workgroup_info = 0
-
-; GCN: enable_sgpr_private_segment_buffer = 1
-; GCN: enable_sgpr_dispatch_ptr = 1
-; GCN: enable_sgpr_queue_ptr = 1
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; GCN: enable_sgpr_dispatch_id = 1
-; GCN: enable_sgpr_flat_scratch_init = 1
-
-; GCN: s_mov_b32 s13, s15
-; GCN: s_mov_b32 s12, s14
-; GCN: s_mov_b32 s14, s16
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 {
- call void @use_every_sgpr_input()
- ret void
-}
-
-; We have to pass the kernarg segment, but there are no kernel
-; arguments so null is passed.
-; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input_no_kernargs:
-; GCN: enable_sgpr_workgroup_id_x = 1
-; GCN: enable_sgpr_workgroup_id_y = 1
-; GCN: enable_sgpr_workgroup_id_z = 1
-; GCN: enable_sgpr_workgroup_info = 0
-
-; GCN: enable_sgpr_private_segment_buffer = 1
-; GCN: enable_sgpr_dispatch_ptr = 1
-; GCN: enable_sgpr_queue_ptr = 1
-; GCN: enable_sgpr_kernarg_segment_ptr = 0
-; GCN: enable_sgpr_dispatch_id = 1
-; GCN: enable_sgpr_flat_scratch_init = 1
-
-; GCN: s_mov_b64 s[10:11], s[8:9]
-; GCN: s_mov_b64 s[8:9], 0{{$}}
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #2 {
- call void @use_every_sgpr_input()
- ret void
-}
-
-; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
-; GCN-NOT: s6
-; GCN-NOT: s7
-; GCN-NOT: s8
-; GCN-NOT: s9
-; GCN-NOT: s10
-; GCN-NOT: s11
-; GCN-NOT: s12
-; GCN-NOT: s13
-; GCN-NOT: s[6:7]
-; GCN-NOT: s[8:9]
-; GCN-NOT: s[10:11]
-; GCN-NOT: s[12:13]
-; GCN: s_or_saveexec_b64 s[16:17], -1
-define hidden void @func_indirect_use_every_sgpr_input() #1 {
- call void @use_every_sgpr_input()
- ret void
-}
-
-; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
-; GCN: s_mov_b32 s4, s12
-; GCN: s_mov_b32 s5, s13
-; GCN: s_mov_b32 s6, s14
-; GCN: ; use s[10:11]
-; GCN: ; use s12
-; GCN: ; use s13
-; GCN: ; use s14
-
-; GCN: s_swappc_b64
-define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
- %alloca = alloca i32, align 4, addrspace(5)
- store volatile i32 0, i32 addrspace(5)* %alloca
-
- %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
- %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
- %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
-
- %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
- %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
- %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
-
- %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
- %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
- %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
-
- %val3 = call i64 @llvm.amdgcn.dispatch.id()
- call void asm sideeffect "; use $0", "s"(i64 %val3)
-
- %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
- call void asm sideeffect "; use $0", "s"(i32 %val4)
-
- %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
- call void asm sideeffect "; use $0", "s"(i32 %val5)
-
- %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
- call void asm sideeffect "; use $0", "s"(i32 %val6)
-
- call void @use_workgroup_id_xyz()
- ret void
-}
-
-; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
-; GCN-DAG: s_mov_b32 s33, s32
-; GCN-DAG: s_addk_i32 s32, 0x400
-; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5]
-; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7]
-
-; GCN: s_mov_b32 s4, s12
-; GCN: s_mov_b32 s5, s13
-; GCN: s_mov_b32 s6, s14
-
-; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-68-9][0-9]*]], s14
-; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-57-9][0-9]*]], s13
-; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s12
-; GCN: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[8:9]
-
-; GCN: s_swappc_b64
-
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}}
-; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_X]]:[[HI_X]]{{\]}}, 0x0
-; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Y]]:[[HI_Y]]{{\]}}, 0x0
-; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Z]]:[[HI_Z]]{{\]}}, 0x0
-; GCN: ; use
-; GCN: ; use [[SAVE_X]]
-; GCN: ; use [[SAVE_Y]]
-; GCN: ; use [[SAVE_Z]]
-define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
- %alloca = alloca i32, align 4, addrspace(5)
- call void @use_workgroup_id_xyz()
-
- store volatile i32 0, i32 addrspace(5)* %alloca
-
- %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
- %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
- %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
-
- %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
- %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
- %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
-
- %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
- %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
- %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
-
- %val3 = call i64 @llvm.amdgcn.dispatch.id()
- call void asm sideeffect "; use $0", "s"(i64 %val3)
-
- %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
- call void asm sideeffect "; use $0", "s"(i32 %val4)
-
- %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
- call void asm sideeffect "; use $0", "s"(i32 %val5)
-
- %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
- call void asm sideeffect "; use $0", "s"(i32 %val6)
-
- ret void
-}
-
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
-declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
-declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
-declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind noinline }
-attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" }
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index f8277361757d..c63830a00a01 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -1,9 +1,7 @@
-; RUN: llc -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VARABI %s
-; RUN: llc -amdgpu-fixed-function-abi=1 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s
; GCN-LABEL: {{^}}use_workitem_id_x:
; GCN: s_waitcnt
-; VARABI: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v0
; FIXEDABI: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
; GCN-NEXT: s_waitcnt
@@ -16,7 +14,6 @@ define void @use_workitem_id_x() #1 {
; GCN-LABEL: {{^}}use_workitem_id_y:
; GCN: s_waitcnt
-; VARABI: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
; GCN-NEXT: s_waitcnt
@@ -29,7 +26,6 @@ define void @use_workitem_id_y() #1 {
; GCN-LABEL: {{^}}use_workitem_id_z:
; GCN: s_waitcnt
-; VARABI: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
; GCN-NEXT: s_waitcnt
@@ -42,9 +38,6 @@ define void @use_workitem_id_z() #1 {
; GCN-LABEL: {{^}}use_workitem_id_xy:
; GCN: s_waitcnt
-; VARABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
-; VARABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
-
; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
@@ -63,10 +56,6 @@ define void @use_workitem_id_xy() #1 {
; GCN-LABEL: {{^}}use_workitem_id_xyz:
; GCN: s_waitcnt
-; VARABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
-; VARABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
-; VARABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
-
; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
@@ -89,9 +78,6 @@ define void @use_workitem_id_xyz() #1 {
; GCN-LABEL: {{^}}use_workitem_id_xz:
; GCN: s_waitcnt
-; VARABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
-; VARABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
-
; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
@@ -109,9 +95,6 @@ define void @use_workitem_id_xz() #1 {
; GCN-LABEL: {{^}}use_workitem_id_yz:
; GCN: s_waitcnt
-; VARABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
-; VARABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
-
; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
@@ -136,9 +119,7 @@ define void @use_workitem_id_yz() #1 {
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v31
-; VARABI-NOT: v31
; GCN: s_swappc_b64
-; VARABI-NOT: v31
define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
call void @use_workitem_id_x()
ret void
@@ -147,9 +128,6 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
; GCN: enable_vgpr_workitem_id = 1
-; VARABI-NOT: v31
-; VARABI: v_lshlrev_b32_e32 v0, 10, v1
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
@@ -158,8 +136,6 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
-; VARABI-NOT: v31
-
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
call void @use_workitem_id_y()
@@ -169,12 +145,6 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
; GCN: enable_vgpr_workitem_id = 2
-; VARABI-NOT: v0
-; VARABI-NOT: v2
-; VARABI: v_lshlrev_b32_e32 v0, 20, v2
-; VARABI-NOT: v0
-; VARABI-NOT: v1
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
@@ -188,13 +158,6 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
}
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
-; VARABI-NOT: v0
-; VARABI-NOT: v1
-; VARABI: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; VARABI: v_or_b32_e32 v0, v0, [[IDY]]
-; VARABI-NOT: v0
-; VARABI-NOT: v1
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
@@ -211,14 +174,6 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
}
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
-; VARABI-NOT: v0
-; VARABI-NOT: v2
-; VARABI: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; VARABI: v_or_b32_e32 v0, v0, [[IDZ]]
-; VARABI-NOT: v0
-; VARABI-NOT: v2
-
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
@@ -235,14 +190,6 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
}
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
-; VARABI-NOT: v1
-; VARABI-NOT: v2
-; VARABI-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; VARABI-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; VARABI: v_or_b32_e32 v0, [[IDY]], [[IDZ]]
-; VARABI-NOT: v1
-; VARABI-NOT: v2
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
@@ -260,17 +207,6 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
}
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
-; VARABI-NOT: v0
-; VARABI-NOT: v1
-; VARABI-NOT: v2
-; VARABI-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; VARABI-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; VARABI-DAG: v_or_b32_e32 v0, v0, [[IDY]]
-; VARABI-DAG: v_or_b32_e32 v0, v0, [[IDZ]]
-; VARABI-NOT: v0
-; VARABI-NOT: v1
-; VARABI-NOT: v2
-
; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
@@ -311,7 +247,6 @@ define void @func_indirect_use_workitem_id_z() #1 {
; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
; GCN: s_waitcnt
-; VARABI-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v1
; FIXEDABI-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
@@ -325,7 +260,6 @@ define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
; GCN: s_waitcnt
-; VARABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 10, 10
; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
@@ -338,7 +272,6 @@ define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
; GCN: s_waitcnt
-; VARABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 20, 10
; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
@@ -353,9 +286,6 @@ define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
; GCN: enable_vgpr_workitem_id = 0
-; VARABI: v_mov_b32_e32 v1, v0
-; VARABI: v_mov_b32_e32 v0, 0x22b
-
; FIXEDABI-NOT: v0
; FIXEDABI: v_mov_b32_e32 v31, v0
; FIXEDABI: v_mov_b32_e32 v0, 0x22b
@@ -370,13 +300,6 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
; GCN: enable_vgpr_workitem_id = 1
-; VARABI: v_lshlrev_b32_e32 v1, 10, v1
-; VARABI-NOT: v1
-; VARABI: v_mov_b32_e32 v0, 0x22b
-; VARABI-NOT: v1
-; VARABI: s_swappc_b64
-; VARABI-NOT: v0
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
@@ -390,11 +313,6 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
; GCN: enable_vgpr_workitem_id = 2
-; VARABI-DAG: v_mov_b32_e32 v0, 0x22b
-; VARABI-DAG: v_lshlrev_b32_e32 v1, 20, v2
-; VARABI: s_swappc_b64
-; VARABI-NOT: v0
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
@@ -406,11 +324,6 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
}
; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
-; VARABI: buffer_load_dword v32, off, s[0:3], s32{{$}}
-; VARABI: v_and_b32_e32 v32, 0x3ff, v32
-; VARABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
-; VARABI: s_setpc_b64
-
; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31
; FIXEDABI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
define void @too_many_args_use_workitem_id_x(
@@ -463,11 +376,6 @@ define void @too_many_args_use_workitem_id_x(
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
; GCN: enable_vgpr_workitem_id = 0
-; VARABI: s_mov_b32 s32, 0
-; VARABI: buffer_store_dword v0, off, s[0:3], s32{{$}}
-; VARABI: s_swappc_b64
-
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
@@ -491,8 +399,6 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
}
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
-; VARABI: s_mov_b32 s33, s32
-; VARABI: buffer_store_dword v1, off, s[0:3], s32{{$}}
; Touching the workitem id register is not necessary.
; FIXEDABI-NOT: v31
@@ -548,13 +454,6 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x(
; frame[2] = VGPR spill slot
; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
-; VARABI: buffer_load_dword v32, off, s[0:3], s32 offset:4
-; VARABI-NEXT: s_waitcnt
-; VARABI-NEXT: v_and_b32_e32 v32, 0x3ff, v32
-; VARABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32
-; VARABI: buffer_load_dword v0, off, s[0:3], s32 glc{{$}}
-; VARABI: s_setpc_b64
-
; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31
; FIXEDABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v31
@@ -616,17 +515,6 @@ define void @too_many_args_use_workitem_id_x_byval(
; sp[2] = stack passed workitem ID x
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
-; VARABI: enable_vgpr_workitem_id = 0
-; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
-; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
-; VARABI: s_movk_i32 s32, 0x400{{$}}
-; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
-
-; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
-; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
-; VARABI: s_swappc_b64
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
@@ -662,15 +550,6 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
}
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
-; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
-; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
-; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
-; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
-; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
-; VARABI: s_swappc_b64
-
-
; FIXED-ABI-NOT: v31
; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7{{$}}
; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s33{{$}}
@@ -699,21 +578,6 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
}
; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
-; VARABI-NOT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
-; VARABI: buffer_load_dword v32, off, s[0:3], s32{{$}}
-; VARABI-NOT: buffer_load_dword
-
-; VARABI: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v32
-; VARABI-NOT: buffer_load_dword
-; VARABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]]
-; VARABI-NOT: buffer_load_dword
-; VARABI: v_bfe_u32 [[BFE_Y:v[0-9]+]], v32, 10, 10
-; VARABI-NEXT: v_bfe_u32 [[BFE_Z:v[0-9]+]], v32, 20, 10
-; VARABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]]
-; VARABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]]
-; VARABI: s_setpc_b64
-
-
; FIXEDABI: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v31
; FIXEDABI-NOT: buffer_load_dword
; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]]
@@ -783,8 +647,6 @@ define void @too_many_args_use_workitem_id_xyz(
; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140
; GCN-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
; GCN-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; VARABI-DAG: v_or_b32_e32 [[PACKEDID:v[0-9]+]], [[TMP2]], [[TMP0]]
-; VARABI: buffer_store_dword [[PACKEDID]], off, s[0:3], s32{{$}}
; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
index 2a0657064cfa..121d5d35bfe7 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
@@ -12,25 +12,25 @@ define float @fdiv_f32(float %a, float %b) #0 {
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %13:vgpr_32, %14:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %15:vgpr_32, %16:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %15, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode
- ; GCN-NEXT: %21:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %15, 0, %17, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %22:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %21, 0, %17, 0, %17, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %23:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %13, 0, %22, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %24:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %15, 0, %23, 0, %13, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %25:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %24, 0, %22, 0, %23, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %26:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %15, 0, %25, 0, %13, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode
- ; GCN-NEXT: $vcc = COPY %14
- ; GCN-NEXT: %27:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %26, 0, %22, 0, %25, 0, 0, implicit $mode, implicit $vcc, implicit $exec
- ; GCN-NEXT: %28:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %27, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vcc = COPY %7
+ ; GCN-NEXT: %20:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec
+ ; GCN-NEXT: %21:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
- ; GCN-NEXT: $vgpr0 = COPY %28
+ ; GCN-NEXT: $vgpr0 = COPY %21
; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
entry:
@@ -46,25 +46,25 @@ define float @fdiv_nnan_f32(float %a, float %b) #0 {
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %13:vgpr_32, %14:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %15:vgpr_32, %16:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %15, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode
- ; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %15, 0, %17, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %22:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %21, 0, %17, 0, %17, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %23:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %13, 0, %22, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %24:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %15, 0, %23, 0, %13, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %25:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %24, 0, %22, 0, %23, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %26:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %15, 0, %25, 0, %13, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode
- ; GCN-NEXT: $vcc = COPY %14
- ; GCN-NEXT: %27:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %26, 0, %22, 0, %25, 0, 0, implicit $mode, implicit $vcc, implicit $exec
- ; GCN-NEXT: %28:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %27, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vcc = COPY %7
+ ; GCN-NEXT: %20:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec
+ ; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
- ; GCN-NEXT: $vgpr0 = COPY %28
+ ; GCN-NEXT: $vgpr0 = COPY %21
; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index 8a1066fb7d02..a47606e14745 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4
@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/returnaddress.ll b/llvm/test/CodeGen/AMDGPU/returnaddress.ll
index 7937ba63c43b..1c0139c4e125 100644
--- a/llvm/test/CodeGen/AMDGPU/returnaddress.ll
+++ b/llvm/test/CodeGen/AMDGPU/returnaddress.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
-; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
; Test with zero frame
; GCN-LABEL: {{^}}func1
More information about the llvm-commits
mailing list