[llvm] r309781 - AMDGPU: Analyze callee resource usage in AsmPrinter
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 1 18:31:28 PDT 2017
Author: arsenm
Date: Tue Aug 1 18:31:28 2017
New Revision: 309781
URL: http://llvm.org/viewvc/llvm-project?rev=309781&view=rev
Log:
AMDGPU: Analyze callee resource usage in AsmPrinter
Added:
llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=309781&r1=309780&r2=309781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Aug 1 18:31:28 2017
@@ -509,20 +509,154 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
}
}
- MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
- for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
- if (MRI.isPhysRegUsed(Reg)) {
- HighestSGPRReg = Reg;
- break;
+ int32_t MaxVGPR = -1;
+ int32_t MaxSGPR = -1;
+ uint32_t CalleeFrameSize = 0;
+
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ // TODO: Check regmasks? Do they occur anywhere except calls?
+ for (const MachineOperand &MO : MI.operands()) {
+ unsigned Width = 0;
+ bool IsSGPR = false;
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ switch (Reg) {
+ case AMDGPU::EXEC:
+ case AMDGPU::EXEC_LO:
+ case AMDGPU::EXEC_HI:
+ case AMDGPU::SCC:
+ case AMDGPU::M0:
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
+ continue;
+
+ case AMDGPU::NoRegister:
+ assert(MI.isDebugValue());
+ continue;
+
+ case AMDGPU::VCC:
+ case AMDGPU::VCC_LO:
+ case AMDGPU::VCC_HI:
+ Info.UsesVCC = true;
+ continue;
+
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::FLAT_SCR_LO:
+ case AMDGPU::FLAT_SCR_HI:
+ continue;
+
+ case AMDGPU::TBA:
+ case AMDGPU::TBA_LO:
+ case AMDGPU::TBA_HI:
+ case AMDGPU::TMA:
+ case AMDGPU::TMA_LO:
+ case AMDGPU::TMA_HI:
+ llvm_unreachable("trap handler registers should not be used");
+
+ default:
+ break;
+ }
+
+ if (AMDGPU::SReg_32RegClass.contains(Reg)) {
+ assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
+ "trap handler registers should not be used");
+ IsSGPR = true;
+ Width = 1;
+ } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 1;
+ } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
+ assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
+ "trap handler registers should not be used");
+ IsSGPR = true;
+ Width = 2;
+ } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 2;
+ } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 3;
+ } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 4;
+ } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 4;
+ } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 8;
+ } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 8;
+ } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 16;
+ } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 16;
+ } else {
+ llvm_unreachable("Unknown register class");
+ }
+ unsigned HWReg = TRI.getHWRegIndex(Reg);
+ int MaxUsed = HWReg + Width - 1;
+ if (IsSGPR) {
+ MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+ } else {
+ MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
+ }
+ }
+
+ if (MI.isCall()) {
+ assert(MI.getOpcode() == AMDGPU::SI_CALL);
+ // Pseudo used just to encode the underlying global. Is there a better
+ // way to track this?
+ const Function *Callee = cast<Function>(MI.getOperand(2).getGlobal());
+ if (Callee->isDeclaration()) {
+ // If this is a call to an external function, we can't do much. Make
+ // conservative guesses.
+
+ // 48 SGPRs - vcc, - flat_scr, -xnack
+ int MaxSGPRGuess = 47 - getNumExtraSGPRs(ST, true,
+ ST.hasFlatAddressSpace());
+ MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
+ MaxVGPR = std::max(MaxVGPR, 23);
+
+ CalleeFrameSize = std::max(CalleeFrameSize, 16384u);
+ Info.UsesVCC = true;
+ Info.UsesFlatScratch = ST.hasFlatAddressSpace();
+ Info.HasDynamicallySizedStack = true;
+ } else {
+ // We force CodeGen to run in SCC order, so the callee's register
+ // usage etc. should be the cumulative usage of all callees.
+ auto I = CallGraphResourceInfo.find(Callee);
+ assert(I != CallGraphResourceInfo.end() &&
+ "callee should have been handled before caller");
+
+ MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
+ MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
+ CalleeFrameSize
+ = std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
+ Info.UsesVCC |= I->second.UsesVCC;
+ Info.UsesFlatScratch |= I->second.UsesFlatScratch;
+ Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
+ Info.HasRecursion |= I->second.HasRecursion;
+ }
+
+ if (!Callee->doesNotRecurse())
+ Info.HasRecursion = true;
+ }
}
}
- // We found the maximum register index. They start at 0, so add one to get the
- // number of registers.
- Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
- TRI.getHWRegIndex(HighestVGPRReg) + 1;
- Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
- TRI.getHWRegIndex(HighestSGPRReg) + 1;
+ Info.NumExplicitSGPR = MaxSGPR + 1;
+ Info.NumVGPR = MaxVGPR + 1;
+ Info.PrivateSegmentSize += CalleeFrameSize;
return Info;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp?rev=309781&r1=309780&r2=309781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp Tue Aug 1 18:31:28 2017
@@ -135,6 +135,11 @@ void AMDGPUMCInstLower::lower(const Mach
// do that with a single pseudo source operation.
if (Opcode == AMDGPU::S_SETPC_B64_return)
Opcode = AMDGPU::S_SETPC_B64;
+ else if (Opcode == AMDGPU::SI_CALL) {
+ // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the
+ // called function.
+ Opcode = AMDGPU::S_SWAPPC_B64;
+ }
int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(Opcode);
if (MCOpcode == -1) {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=309781&r1=309780&r2=309781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Tue Aug 1 18:31:28 2017
@@ -486,7 +486,10 @@ public:
class GCNPassConfig final : public AMDGPUPassConfig {
public:
GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
- : AMDGPUPassConfig(TM, PM) {}
+ : AMDGPUPassConfig(TM, PM) {
+ // It is necessary to know the register usage of the entire call graph.
+ setRequiresCodeGenSCCOrder(EnableAMDGPUFunctionCalls);
+ }
GCNTargetMachine &getGCNTargetMachine() const {
return getTM<GCNTargetMachine>();
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=309781&r1=309780&r2=309781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Aug 1 18:31:28 2017
@@ -2650,14 +2650,27 @@ MachineBasicBlock *SITargetLowering::Emi
.addReg(Info->getStackPtrOffsetReg(), RegState::Implicit);
return BB;
}
- case AMDGPU::SI_CALL: {
+ case AMDGPU::SI_CALL_ISEL: {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const DebugLoc &DL = MI.getDebugLoc();
unsigned ReturnAddrReg = TII->getRegisterInfo().getReturnAddressReg(*MF);
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned GlobalAddrReg = MI.getOperand(0).getReg();
+ MachineInstr *PCRel = MRI.getVRegDef(GlobalAddrReg);
+ assert(PCRel->getOpcode() == AMDGPU::SI_PC_ADD_REL_OFFSET);
+
+ const GlobalValue *G = PCRel->getOperand(1).getGlobal();
+
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_SWAPPC_B64), ReturnAddrReg);
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg)
+ .add(MI.getOperand(0))
+ .addGlobalAddress(G);
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
MIB.add(MI.getOperand(I));
+
+
MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=309781&r1=309780&r2=309781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Tue Aug 1 18:31:28 2017
@@ -327,16 +327,28 @@ def SI_RETURN : SPseudoInstSI <
let SchedRW = [WriteBranch];
}
-// Return for returning function calls.
-def SI_CALL : SPseudoInstSI <
- (outs), (ins SSrc_b64:$src0), [(AMDGPUcall i64:$src0)],
- "; call $src0"> {
+// Return for returning function calls without output register.
+//
+// This version is only needed so we can fill in the output regiter in
+// the custom inserter.
+def SI_CALL_ISEL : SPseudoInstSI <
+ (outs), (ins SSrc_b64:$src0), [(AMDGPUcall i64:$src0)]> {
let Size = 4;
let isCall = 1;
let SchedRW = [WriteBranch];
let usesCustomInserter = 1;
}
+// Wrapper around s_swappc_b64 with extra $callee parameter to track
+// the called function after regalloc.
+def SI_CALL : SPseudoInstSI <
+ (outs SReg_64:$dst), (ins SSrc_b64:$src0, unknown:$callee)> {
+ let Size = 4;
+ let isCall = 1;
+ let SchedRW = [WriteBranch];
+}
+
+
def ADJCALLSTACKUP : SPseudoInstSI<
(outs), (ins i32imm:$amt0, i32imm:$amt1),
[(callseq_start timm:$amt0, timm:$amt1)],
Added: llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll?rev=309781&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll Tue Aug 1 18:31:28 2017
@@ -0,0 +1,230 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
+
+; Make sure to run a GPU with the SGPR allocation bug.
+
+; GCN-LABEL: {{^}}use_vcc:
+; GCN: ; NumSgprs: 34
+; GCN: ; NumVgprs: 0
+define void @use_vcc() #1 {
+ call void asm sideeffect "", "~{vcc}" () #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_use_vcc:
+; GCN: v_writelane_b32 v32, s33, 0
+; GCN: v_writelane_b32 v32, s34, 1
+; GCN: v_writelane_b32 v32, s35, 2
+; GCN: s_swappc_b64
+; GCN: v_readlane_b32 s35, v32, 2
+; GCN: v_readlane_b32 s34, v32, 1
+; GCN: v_readlane_b32 s33, v32, 0
+; GCN: ; NumSgprs: 38
+; GCN: ; NumVgprs: 33
+define void @indirect_use_vcc() #1 {
+ call void @use_vcc()
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
+; GCN: is_dynamic_callstack = 0
+; CI: ; NumSgprs: 40
+; VI-NOBUG: ; NumSgprs: 42
+; VI-BUG: ; NumSgprs: 96
+; GCN: ; NumVgprs: 33
+define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 {
+ call void @indirect_use_vcc()
+ ret void
+}
+
+; GCN-LABEL: {{^}}use_flat_scratch:
+; CI: ; NumSgprs: 36
+; VI: ; NumSgprs: 38
+; GCN: ; NumVgprs: 0
+define void @use_flat_scratch() #1 {
+ call void asm sideeffect "", "~{flat_scratch}" () #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_use_flat_scratch:
+; CI: ; NumSgprs: 40
+; VI: ; NumSgprs: 42
+; GCN: ; NumVgprs: 33
+define void @indirect_use_flat_scratch() #1 {
+ call void @use_flat_scratch()
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
+; GCN: is_dynamic_callstack = 0
+; CI: ; NumSgprs: 40
+; VI-NOBUG: ; NumSgprs: 42
+; VI-BUG: ; NumSgprs: 96
+; GCN: ; NumVgprs: 33
+define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 {
+ call void @indirect_use_flat_scratch()
+ ret void
+}
+
+; GCN-LABEL: {{^}}use_10_vgpr:
+; GCN: ; NumVgprs: 10
+define void @use_10_vgpr() #1 {
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0
+ call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_use_10_vgpr:
+; GCN: ; NumVgprs: 33
+define void @indirect_use_10_vgpr() #0 {
+ call void @use_10_vgpr()
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
+; GCN: is_dynamic_callstack = 0
+; GCN: ; NumVgprs: 10
+define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
+ call void @indirect_use_10_vgpr()
+ ret void
+}
+
+; GCN-LABEL: {{^}}use_40_vgpr:
+; GCN: ; NumVgprs: 40
+define void @use_40_vgpr() #1 {
+ call void asm sideeffect "", "~{v39}"() #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_use_40_vgpr:
+; GCN: ; NumVgprs: 40
+define void @indirect_use_40_vgpr() #0 {
+ call void @use_40_vgpr()
+ ret void
+}
+
+; GCN-LABEL: {{^}}use_80_sgpr:
+; GCN: ; NumSgprs: 80
+define void @use_80_sgpr() #1 {
+ call void asm sideeffect "", "~{s79}"() #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_use_80_sgpr:
+; GCN: ; NumSgprs: 82
+define void @indirect_use_80_sgpr() #1 {
+ call void @use_80_sgpr()
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
+; GCN: is_dynamic_callstack = 0
+; CI: ; NumSgprs: 84
+; VI-NOBUG: ; NumSgprs: 86
+; VI-BUG: ; NumSgprs: 96
+define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
+ call void @indirect_use_80_sgpr()
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}use_stack0:
+; GCN: ScratchSize: 2052
+define void @use_stack0() #1 {
+ %alloca = alloca [512 x i32], align 4
+ call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}use_stack1:
+; GCN: ScratchSize: 404
+define void @use_stack1() #1 {
+ %alloca = alloca [100 x i32], align 4
+ call void asm sideeffect "; use $0", "v"([100 x i32]* %alloca) #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_use_stack:
+; GCN: ScratchSize: 2120
+define void @indirect_use_stack() #1 {
+ %alloca = alloca [16 x i32], align 4
+ call void asm sideeffect "; use $0", "v"([16 x i32]* %alloca) #0
+ call void @use_stack0()
+ ret void
+}
+
+; GCN-LABEL: {{^}}indirect_2_level_use_stack:
+; GCN: is_dynamic_callstack = 0
+; GCN: ScratchSize: 2120
+define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
+ call void @indirect_use_stack()
+ ret void
+}
+
+
+; Should be maximum of callee usage
+; GCN-LABEL: {{^}}multi_call_use_use_stack:
+; GCN: is_dynamic_callstack = 0
+; GCN: ScratchSize: 2052
+define amdgpu_kernel void @multi_call_use_use_stack() #0 {
+ call void @use_stack0()
+ call void @use_stack1()
+ ret void
+}
+
+
+declare void @external() #0
+
+; GCN-LABEL: {{^}}usage_external:
+; GCN: is_dynamic_callstack = 1
+; NumSgprs: 48
+; NumVgprs: 24
+; GCN: ScratchSize: 16384
+define amdgpu_kernel void @usage_external() #0 {
+ call void @external()
+ ret void
+}
+
+declare void @external_recurse() #2
+
+; GCN-LABEL: {{^}}usage_external_recurse:
+; GCN: is_dynamic_callstack = 1
+; NumSgprs: 48
+; NumVgprs: 24
+; GCN: ScratchSize: 16384
+define amdgpu_kernel void @usage_external_recurse() #0 {
+ call void @external_recurse()
+ ret void
+}
+
+; GCN-LABEL: {{^}}direct_recursion_use_stack:
+; GCN: ScratchSize: 2052
+define void @direct_recursion_use_stack(i32 %val) #2 {
+ %alloca = alloca [512 x i32], align 4
+ call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0
+ %cmp = icmp eq i32 %val, 0
+ br i1 %cmp, label %ret, label %call
+
+call:
+ %val.sub1 = sub i32 %val, 1
+ call void @direct_recursion_use_stack(i32 %val.sub1)
+ br label %ret
+
+ret:
+ ret void
+}
+
+; GCN-LABEL: {{^}}usage_direct_recursion:
+; GCN: is_ptr64 = 1
+; GCN: is_dynamic_callstack = 1
+; GCN: workitem_private_segment_byte_size = 2052
+define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
+ call void @direct_recursion_use_stack(i32 %n)
+ ret void
+}
+
+
+attributes #0 = { nounwind norecurse }
+attributes #1 = { nounwind noinline norecurse }
+attributes #2 = { nounwind noinline }
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll?rev=309781&r1=309780&r2=309781&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll Tue Aug 1 18:31:28 2017
@@ -3,9 +3,9 @@
; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
; IR: alloca [5 x i32]
-; ASM-LABEL: {{^}}promote_alloca_shaders:
-; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only)
+; ASM-LABEL: {{^}}promote_alloca_shaders:
+; ASM: ; ScratchSize: 24
define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
entry:
%stack = alloca [5 x i32], align 4
@@ -29,7 +29,10 @@ entry:
; OPT-LABEL: @promote_to_vector_call_c(
; OPT-NOT: alloca
; OPT: extractelement <2 x i32> %{{[0-9]+}}, i32 %in
+
+; ASM-LABEL: {{^}}promote_to_vector_call_c:
; ASM-NOT: LDSByteSize
+; ASM: ; ScratchSize: 0
define void @promote_to_vector_call_c(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
%tmp = alloca [2 x i32]
@@ -47,8 +50,11 @@ entry:
; OPT-LABEL: @no_promote_to_lds_c(
; OPT: alloca
+
+; ASM-LABEL: {{^}}no_promote_to_lds_c:
; ASM-NOT: LDSByteSize
-define void @no_promote_to_lds(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
+; ASM: ; ScratchSize: 24
+define void @no_promote_to_lds_c(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
%stack = alloca [5 x i32], align 4
%0 = load i32, i32 addrspace(1)* %in, align 4
More information about the llvm-commits
mailing list