[llvm] ccad5e7 - AMDGPU: Respect amdgpu-no-agpr in functions and with calls (#128147)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 22 18:00:41 PST 2025
Author: Matt Arsenault
Date: 2025-02-23T09:00:37+07:00
New Revision: ccad5e77442f7f237939395ebce1ae7adf187380
URL: https://github.com/llvm/llvm-project/commit/ccad5e77442f7f237939395ebce1ae7adf187380
DIFF: https://github.com/llvm/llvm-project/commit/ccad5e77442f7f237939395ebce1ae7adf187380.diff
LOG: AMDGPU: Respect amdgpu-no-agpr in functions and with calls (#128147)
Remove the MIR scan to detect whether AGPRs are used or not,
and the special case for callable functions. This behavior was
confusing, and not overridable. The amdgpu-no-agpr attribute was
intended to avoid this imprecise heuristic for how many AGPRs to
allocate. It was also too confusing to make this interact with
the pending amdgpu-num-agpr replacement for amdgpu-no-agpr.
Also adds an xfail-ish test where the register allocator asserts
after allocation fails which I ran into.
Future work should reintroduce a more refined MIR scan to estimate
AGPR pressure for how to split AGPRs and VGPRs.
Added:
llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
Modified:
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c5efb89d8b2db..a83fc2d188de2 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -64,6 +64,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
}
MayNeedAGPRs = ST.hasMAIInsts();
+ if (ST.hasGFX90AInsts() &&
+ ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
+ !mayUseAGPRs(F))
+ MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
if (AMDGPU::isChainCC(CC)) {
// Chain functions don't receive an SP from their caller, but are free to
@@ -98,13 +102,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
ImplicitArgPtr = true;
} else {
ImplicitArgPtr = false;
- MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
- MaxKernArgAlign);
-
- if (ST.hasGFX90AInsts() &&
- ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
- !mayUseAGPRs(F))
- MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
+ MaxKernArgAlign =
+ std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);
}
if (!AMDGPU::isGraphics(CC) ||
@@ -783,44 +782,3 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
return !F.hasFnAttribute("amdgpu-no-agpr");
}
-
-bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
- if (UsesAGPRs)
- return *UsesAGPRs;
-
- if (!mayNeedAGPRs()) {
- UsesAGPRs = false;
- return false;
- }
-
- if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
- MF.getFrameInfo().hasCalls()) {
- UsesAGPRs = true;
- return true;
- }
-
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- const Register Reg = Register::index2VirtReg(I);
- const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
- if (RC && SIRegisterInfo::isAGPRClass(RC)) {
- UsesAGPRs = true;
- return true;
- }
- if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
- // Defer caching UsesAGPRs, function might not yet been regbank selected.
- return true;
- }
- }
-
- for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
- if (MRI.isPhysRegUsed(Reg)) {
- UsesAGPRs = true;
- return true;
- }
- }
-
- UsesAGPRs = false;
- return false;
-}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 2e2716f1ce888..740f752bc93b7 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -494,8 +494,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// scheduler stage.
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
- mutable std::optional<bool> UsesAGPRs;
-
MCPhysReg getNextUserSGPR() const;
MCPhysReg getNextSystemSGPR() const;
@@ -1126,9 +1124,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// has a call which may use it.
bool mayUseAGPRs(const Function &F) const;
- // \returns true if a function needs or may need AGPRs.
- bool usesAGPRs(const MachineFunction &MF) const;
-
/// \returns Default/requested number of work groups for this function.
SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 71c720ed09b5f..924aa45559366 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -585,7 +585,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
// register file accordingly.
if (ST.hasGFX90AInsts()) {
- if (MFI->usesAGPRs(MF)) {
+ if (MFI->mayNeedAGPRs()) {
MaxNumVGPRs /= 2;
MaxNumAGPRs = MaxNumVGPRs;
} else {
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
new file mode 100644
index 0000000000000..5f2e9af378f08
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CRASH %s
+
+; CRASH: error: <unknown>:0:0: no registers from class available to allocate in function 'no_free_vgprs_at_agpr_to_agpr_copy'
+; CRASH: Cannot access invalid iterator
+
+define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
+ %asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1", "=${v[0:31]},=${a[0:15]}"()
+ %vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0
+ %agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1
+ %mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0)
+ %agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0)
+ %agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma)
+ call void asm sideeffect "; use $0 $1", "{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0)
+ ret void
+}
+
+declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
+declare noundef i32 @llvm.amdgcn.workitem.id.x() #2
+
+attributes #0 = { "amdgpu-no-agpr" "amdgpu-waves-per-eu"="6,6" }
+attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 4ce46bbaf45ac..d1b01eeee11a4 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -240,7 +240,7 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
}
; Check that we do make use of v32 if there are no AGPRs present in the function
-define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #5 {
; GFX908-LABEL: no_agpr_no_reserve:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
@@ -1144,5 +1144,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
attributes #0 = { "amdgpu-waves-per-eu"="6,6" }
attributes #1 = { convergent nounwind readnone willreturn }
attributes #2 = { nounwind readnone willreturn }
-attributes #3 = { "amdgpu-waves-per-eu"="7,7" }
+attributes #3 = { "amdgpu-waves-per-eu"="7,7" "amdgpu-no-agpr" }
attributes #4 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-flat-work-group-size"="1024,1024" }
+attributes #5 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-no-agpr" }
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 322686b0144a0..f6f78f134fc1f 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -94,9 +94,20 @@ bb3:
ret void
}
-; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry:
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_noagpr:
+; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
+; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
+define void @test_mfma_f32_32x32x1f32_nonentry_noagpr(ptr addrspace(1) %arg) #0 {
+bb:
+ %in.1 = load <32 x float>, ptr addrspace(1) %arg
+ %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
+ store <32 x float> %mai.1, ptr addrspace(1) %arg
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_with_agpr:
; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) #0 {
+define void @test_mfma_f32_32x32x1f32_nonentry_with_agpr(ptr addrspace(1) %arg) #3 {
bb:
%in.1 = load <32 x float>, ptr addrspace(1) %arg
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -109,3 +120,4 @@ declare void @foo()
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
attributes #1 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
attributes #2 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" }
+attributes #3 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
index ed57caadea5c5..583d1f9e4a9fd 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
@@ -6,7 +6,7 @@
ret void
}
- attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
+ attributes #0 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-no-agpr" }
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
index 040799435db4a..03978e68e81b4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
@@ -518,6 +518,522 @@ define internal void @use256vgprs() {
ret void
}
+define internal void @use256vgprs_no_agpr() "amdgpu-no-agpr" {
+ %v0 = call i32 asm sideeffect "; def $0", "=v"()
+ %v1 = call i32 asm sideeffect "; def $0", "=v"()
+ %v2 = call i32 asm sideeffect "; def $0", "=v"()
+ %v3 = call i32 asm sideeffect "; def $0", "=v"()
+ %v4 = call i32 asm sideeffect "; def $0", "=v"()
+ %v5 = call i32 asm sideeffect "; def $0", "=v"()
+ %v6 = call i32 asm sideeffect "; def $0", "=v"()
+ %v7 = call i32 asm sideeffect "; def $0", "=v"()
+ %v8 = call i32 asm sideeffect "; def $0", "=v"()
+ %v9 = call i32 asm sideeffect "; def $0", "=v"()
+ %v10 = call i32 asm sideeffect "; def $0", "=v"()
+ %v11 = call i32 asm sideeffect "; def $0", "=v"()
+ %v12 = call i32 asm sideeffect "; def $0", "=v"()
+ %v13 = call i32 asm sideeffect "; def $0", "=v"()
+ %v14 = call i32 asm sideeffect "; def $0", "=v"()
+ %v15 = call i32 asm sideeffect "; def $0", "=v"()
+ %v16 = call i32 asm sideeffect "; def $0", "=v"()
+ %v17 = call i32 asm sideeffect "; def $0", "=v"()
+ %v18 = call i32 asm sideeffect "; def $0", "=v"()
+ %v19 = call i32 asm sideeffect "; def $0", "=v"()
+ %v20 = call i32 asm sideeffect "; def $0", "=v"()
+ %v21 = call i32 asm sideeffect "; def $0", "=v"()
+ %v22 = call i32 asm sideeffect "; def $0", "=v"()
+ %v23 = call i32 asm sideeffect "; def $0", "=v"()
+ %v24 = call i32 asm sideeffect "; def $0", "=v"()
+ %v25 = call i32 asm sideeffect "; def $0", "=v"()
+ %v26 = call i32 asm sideeffect "; def $0", "=v"()
+ %v27 = call i32 asm sideeffect "; def $0", "=v"()
+ %v28 = call i32 asm sideeffect "; def $0", "=v"()
+ %v29 = call i32 asm sideeffect "; def $0", "=v"()
+ %v30 = call i32 asm sideeffect "; def $0", "=v"()
+ %v31 = call i32 asm sideeffect "; def $0", "=v"()
+ %v32 = call i32 asm sideeffect "; def $0", "=v"()
+ %v33 = call i32 asm sideeffect "; def $0", "=v"()
+ %v34 = call i32 asm sideeffect "; def $0", "=v"()
+ %v35 = call i32 asm sideeffect "; def $0", "=v"()
+ %v36 = call i32 asm sideeffect "; def $0", "=v"()
+ %v37 = call i32 asm sideeffect "; def $0", "=v"()
+ %v38 = call i32 asm sideeffect "; def $0", "=v"()
+ %v39 = call i32 asm sideeffect "; def $0", "=v"()
+ %v40 = call i32 asm sideeffect "; def $0", "=v"()
+ %v41 = call i32 asm sideeffect "; def $0", "=v"()
+ %v42 = call i32 asm sideeffect "; def $0", "=v"()
+ %v43 = call i32 asm sideeffect "; def $0", "=v"()
+ %v44 = call i32 asm sideeffect "; def $0", "=v"()
+ %v45 = call i32 asm sideeffect "; def $0", "=v"()
+ %v46 = call i32 asm sideeffect "; def $0", "=v"()
+ %v47 = call i32 asm sideeffect "; def $0", "=v"()
+ %v48 = call i32 asm sideeffect "; def $0", "=v"()
+ %v49 = call i32 asm sideeffect "; def $0", "=v"()
+ %v50 = call i32 asm sideeffect "; def $0", "=v"()
+ %v51 = call i32 asm sideeffect "; def $0", "=v"()
+ %v52 = call i32 asm sideeffect "; def $0", "=v"()
+ %v53 = call i32 asm sideeffect "; def $0", "=v"()
+ %v54 = call i32 asm sideeffect "; def $0", "=v"()
+ %v55 = call i32 asm sideeffect "; def $0", "=v"()
+ %v56 = call i32 asm sideeffect "; def $0", "=v"()
+ %v57 = call i32 asm sideeffect "; def $0", "=v"()
+ %v58 = call i32 asm sideeffect "; def $0", "=v"()
+ %v59 = call i32 asm sideeffect "; def $0", "=v"()
+ %v60 = call i32 asm sideeffect "; def $0", "=v"()
+ %v61 = call i32 asm sideeffect "; def $0", "=v"()
+ %v62 = call i32 asm sideeffect "; def $0", "=v"()
+ %v63 = call i32 asm sideeffect "; def $0", "=v"()
+ %v64 = call i32 asm sideeffect "; def $0", "=v"()
+ %v65 = call i32 asm sideeffect "; def $0", "=v"()
+ %v66 = call i32 asm sideeffect "; def $0", "=v"()
+ %v67 = call i32 asm sideeffect "; def $0", "=v"()
+ %v68 = call i32 asm sideeffect "; def $0", "=v"()
+ %v69 = call i32 asm sideeffect "; def $0", "=v"()
+ %v70 = call i32 asm sideeffect "; def $0", "=v"()
+ %v71 = call i32 asm sideeffect "; def $0", "=v"()
+ %v72 = call i32 asm sideeffect "; def $0", "=v"()
+ %v73 = call i32 asm sideeffect "; def $0", "=v"()
+ %v74 = call i32 asm sideeffect "; def $0", "=v"()
+ %v75 = call i32 asm sideeffect "; def $0", "=v"()
+ %v76 = call i32 asm sideeffect "; def $0", "=v"()
+ %v77 = call i32 asm sideeffect "; def $0", "=v"()
+ %v78 = call i32 asm sideeffect "; def $0", "=v"()
+ %v79 = call i32 asm sideeffect "; def $0", "=v"()
+ %v80 = call i32 asm sideeffect "; def $0", "=v"()
+ %v81 = call i32 asm sideeffect "; def $0", "=v"()
+ %v82 = call i32 asm sideeffect "; def $0", "=v"()
+ %v83 = call i32 asm sideeffect "; def $0", "=v"()
+ %v84 = call i32 asm sideeffect "; def $0", "=v"()
+ %v85 = call i32 asm sideeffect "; def $0", "=v"()
+ %v86 = call i32 asm sideeffect "; def $0", "=v"()
+ %v87 = call i32 asm sideeffect "; def $0", "=v"()
+ %v88 = call i32 asm sideeffect "; def $0", "=v"()
+ %v89 = call i32 asm sideeffect "; def $0", "=v"()
+ %v90 = call i32 asm sideeffect "; def $0", "=v"()
+ %v91 = call i32 asm sideeffect "; def $0", "=v"()
+ %v92 = call i32 asm sideeffect "; def $0", "=v"()
+ %v93 = call i32 asm sideeffect "; def $0", "=v"()
+ %v94 = call i32 asm sideeffect "; def $0", "=v"()
+ %v95 = call i32 asm sideeffect "; def $0", "=v"()
+ %v96 = call i32 asm sideeffect "; def $0", "=v"()
+ %v97 = call i32 asm sideeffect "; def $0", "=v"()
+ %v98 = call i32 asm sideeffect "; def $0", "=v"()
+ %v99 = call i32 asm sideeffect "; def $0", "=v"()
+ %v100 = call i32 asm sideeffect "; def $0", "=v"()
+ %v101 = call i32 asm sideeffect "; def $0", "=v"()
+ %v102 = call i32 asm sideeffect "; def $0", "=v"()
+ %v103 = call i32 asm sideeffect "; def $0", "=v"()
+ %v104 = call i32 asm sideeffect "; def $0", "=v"()
+ %v105 = call i32 asm sideeffect "; def $0", "=v"()
+ %v106 = call i32 asm sideeffect "; def $0", "=v"()
+ %v107 = call i32 asm sideeffect "; def $0", "=v"()
+ %v108 = call i32 asm sideeffect "; def $0", "=v"()
+ %v109 = call i32 asm sideeffect "; def $0", "=v"()
+ %v110 = call i32 asm sideeffect "; def $0", "=v"()
+ %v111 = call i32 asm sideeffect "; def $0", "=v"()
+ %v112 = call i32 asm sideeffect "; def $0", "=v"()
+ %v113 = call i32 asm sideeffect "; def $0", "=v"()
+ %v114 = call i32 asm sideeffect "; def $0", "=v"()
+ %v115 = call i32 asm sideeffect "; def $0", "=v"()
+ %v116 = call i32 asm sideeffect "; def $0", "=v"()
+ %v117 = call i32 asm sideeffect "; def $0", "=v"()
+ %v118 = call i32 asm sideeffect "; def $0", "=v"()
+ %v119 = call i32 asm sideeffect "; def $0", "=v"()
+ %v120 = call i32 asm sideeffect "; def $0", "=v"()
+ %v121 = call i32 asm sideeffect "; def $0", "=v"()
+ %v122 = call i32 asm sideeffect "; def $0", "=v"()
+ %v123 = call i32 asm sideeffect "; def $0", "=v"()
+ %v124 = call i32 asm sideeffect "; def $0", "=v"()
+ %v125 = call i32 asm sideeffect "; def $0", "=v"()
+ %v126 = call i32 asm sideeffect "; def $0", "=v"()
+ %v127 = call i32 asm sideeffect "; def $0", "=v"()
+ %v128 = call i32 asm sideeffect "; def $0", "=v"()
+ %v129 = call i32 asm sideeffect "; def $0", "=v"()
+ %v130 = call i32 asm sideeffect "; def $0", "=v"()
+ %v131 = call i32 asm sideeffect "; def $0", "=v"()
+ %v132 = call i32 asm sideeffect "; def $0", "=v"()
+ %v133 = call i32 asm sideeffect "; def $0", "=v"()
+ %v134 = call i32 asm sideeffect "; def $0", "=v"()
+ %v135 = call i32 asm sideeffect "; def $0", "=v"()
+ %v136 = call i32 asm sideeffect "; def $0", "=v"()
+ %v137 = call i32 asm sideeffect "; def $0", "=v"()
+ %v138 = call i32 asm sideeffect "; def $0", "=v"()
+ %v139 = call i32 asm sideeffect "; def $0", "=v"()
+ %v140 = call i32 asm sideeffect "; def $0", "=v"()
+ %v141 = call i32 asm sideeffect "; def $0", "=v"()
+ %v142 = call i32 asm sideeffect "; def $0", "=v"()
+ %v143 = call i32 asm sideeffect "; def $0", "=v"()
+ %v144 = call i32 asm sideeffect "; def $0", "=v"()
+ %v145 = call i32 asm sideeffect "; def $0", "=v"()
+ %v146 = call i32 asm sideeffect "; def $0", "=v"()
+ %v147 = call i32 asm sideeffect "; def $0", "=v"()
+ %v148 = call i32 asm sideeffect "; def $0", "=v"()
+ %v149 = call i32 asm sideeffect "; def $0", "=v"()
+ %v150 = call i32 asm sideeffect "; def $0", "=v"()
+ %v151 = call i32 asm sideeffect "; def $0", "=v"()
+ %v152 = call i32 asm sideeffect "; def $0", "=v"()
+ %v153 = call i32 asm sideeffect "; def $0", "=v"()
+ %v154 = call i32 asm sideeffect "; def $0", "=v"()
+ %v155 = call i32 asm sideeffect "; def $0", "=v"()
+ %v156 = call i32 asm sideeffect "; def $0", "=v"()
+ %v157 = call i32 asm sideeffect "; def $0", "=v"()
+ %v158 = call i32 asm sideeffect "; def $0", "=v"()
+ %v159 = call i32 asm sideeffect "; def $0", "=v"()
+ %v160 = call i32 asm sideeffect "; def $0", "=v"()
+ %v161 = call i32 asm sideeffect "; def $0", "=v"()
+ %v162 = call i32 asm sideeffect "; def $0", "=v"()
+ %v163 = call i32 asm sideeffect "; def $0", "=v"()
+ %v164 = call i32 asm sideeffect "; def $0", "=v"()
+ %v165 = call i32 asm sideeffect "; def $0", "=v"()
+ %v166 = call i32 asm sideeffect "; def $0", "=v"()
+ %v167 = call i32 asm sideeffect "; def $0", "=v"()
+ %v168 = call i32 asm sideeffect "; def $0", "=v"()
+ %v169 = call i32 asm sideeffect "; def $0", "=v"()
+ %v170 = call i32 asm sideeffect "; def $0", "=v"()
+ %v171 = call i32 asm sideeffect "; def $0", "=v"()
+ %v172 = call i32 asm sideeffect "; def $0", "=v"()
+ %v173 = call i32 asm sideeffect "; def $0", "=v"()
+ %v174 = call i32 asm sideeffect "; def $0", "=v"()
+ %v175 = call i32 asm sideeffect "; def $0", "=v"()
+ %v176 = call i32 asm sideeffect "; def $0", "=v"()
+ %v177 = call i32 asm sideeffect "; def $0", "=v"()
+ %v178 = call i32 asm sideeffect "; def $0", "=v"()
+ %v179 = call i32 asm sideeffect "; def $0", "=v"()
+ %v180 = call i32 asm sideeffect "; def $0", "=v"()
+ %v181 = call i32 asm sideeffect "; def $0", "=v"()
+ %v182 = call i32 asm sideeffect "; def $0", "=v"()
+ %v183 = call i32 asm sideeffect "; def $0", "=v"()
+ %v184 = call i32 asm sideeffect "; def $0", "=v"()
+ %v185 = call i32 asm sideeffect "; def $0", "=v"()
+ %v186 = call i32 asm sideeffect "; def $0", "=v"()
+ %v187 = call i32 asm sideeffect "; def $0", "=v"()
+ %v188 = call i32 asm sideeffect "; def $0", "=v"()
+ %v189 = call i32 asm sideeffect "; def $0", "=v"()
+ %v190 = call i32 asm sideeffect "; def $0", "=v"()
+ %v191 = call i32 asm sideeffect "; def $0", "=v"()
+ %v192 = call i32 asm sideeffect "; def $0", "=v"()
+ %v193 = call i32 asm sideeffect "; def $0", "=v"()
+ %v194 = call i32 asm sideeffect "; def $0", "=v"()
+ %v195 = call i32 asm sideeffect "; def $0", "=v"()
+ %v196 = call i32 asm sideeffect "; def $0", "=v"()
+ %v197 = call i32 asm sideeffect "; def $0", "=v"()
+ %v198 = call i32 asm sideeffect "; def $0", "=v"()
+ %v199 = call i32 asm sideeffect "; def $0", "=v"()
+ %v200 = call i32 asm sideeffect "; def $0", "=v"()
+ %v201 = call i32 asm sideeffect "; def $0", "=v"()
+ %v202 = call i32 asm sideeffect "; def $0", "=v"()
+ %v203 = call i32 asm sideeffect "; def $0", "=v"()
+ %v204 = call i32 asm sideeffect "; def $0", "=v"()
+ %v205 = call i32 asm sideeffect "; def $0", "=v"()
+ %v206 = call i32 asm sideeffect "; def $0", "=v"()
+ %v207 = call i32 asm sideeffect "; def $0", "=v"()
+ %v208 = call i32 asm sideeffect "; def $0", "=v"()
+ %v209 = call i32 asm sideeffect "; def $0", "=v"()
+ %v210 = call i32 asm sideeffect "; def $0", "=v"()
+ %v211 = call i32 asm sideeffect "; def $0", "=v"()
+ %v212 = call i32 asm sideeffect "; def $0", "=v"()
+ %v213 = call i32 asm sideeffect "; def $0", "=v"()
+ %v214 = call i32 asm sideeffect "; def $0", "=v"()
+ %v215 = call i32 asm sideeffect "; def $0", "=v"()
+ %v216 = call i32 asm sideeffect "; def $0", "=v"()
+ %v217 = call i32 asm sideeffect "; def $0", "=v"()
+ %v218 = call i32 asm sideeffect "; def $0", "=v"()
+ %v219 = call i32 asm sideeffect "; def $0", "=v"()
+ %v220 = call i32 asm sideeffect "; def $0", "=v"()
+ %v221 = call i32 asm sideeffect "; def $0", "=v"()
+ %v222 = call i32 asm sideeffect "; def $0", "=v"()
+ %v223 = call i32 asm sideeffect "; def $0", "=v"()
+ %v224 = call i32 asm sideeffect "; def $0", "=v"()
+ %v225 = call i32 asm sideeffect "; def $0", "=v"()
+ %v226 = call i32 asm sideeffect "; def $0", "=v"()
+ %v227 = call i32 asm sideeffect "; def $0", "=v"()
+ %v228 = call i32 asm sideeffect "; def $0", "=v"()
+ %v229 = call i32 asm sideeffect "; def $0", "=v"()
+ %v230 = call i32 asm sideeffect "; def $0", "=v"()
+ %v231 = call i32 asm sideeffect "; def $0", "=v"()
+ %v232 = call i32 asm sideeffect "; def $0", "=v"()
+ %v233 = call i32 asm sideeffect "; def $0", "=v"()
+ %v234 = call i32 asm sideeffect "; def $0", "=v"()
+ %v235 = call i32 asm sideeffect "; def $0", "=v"()
+ %v236 = call i32 asm sideeffect "; def $0", "=v"()
+ %v237 = call i32 asm sideeffect "; def $0", "=v"()
+ %v238 = call i32 asm sideeffect "; def $0", "=v"()
+ %v239 = call i32 asm sideeffect "; def $0", "=v"()
+ %v240 = call i32 asm sideeffect "; def $0", "=v"()
+ %v241 = call i32 asm sideeffect "; def $0", "=v"()
+ %v242 = call i32 asm sideeffect "; def $0", "=v"()
+ %v243 = call i32 asm sideeffect "; def $0", "=v"()
+ %v244 = call i32 asm sideeffect "; def $0", "=v"()
+ %v245 = call i32 asm sideeffect "; def $0", "=v"()
+ %v246 = call i32 asm sideeffect "; def $0", "=v"()
+ %v247 = call i32 asm sideeffect "; def $0", "=v"()
+ %v248 = call i32 asm sideeffect "; def $0", "=v"()
+ %v249 = call i32 asm sideeffect "; def $0", "=v"()
+ %v250 = call i32 asm sideeffect "; def $0", "=v"()
+ %v251 = call i32 asm sideeffect "; def $0", "=v"()
+ %v252 = call i32 asm sideeffect "; def $0", "=v"()
+ %v253 = call i32 asm sideeffect "; def $0", "=v"()
+ %v254 = call i32 asm sideeffect "; def $0", "=v"()
+ %v255 = call i32 asm sideeffect "; def $0", "=v"()
+ call void asm sideeffect "; use $0", "v"(i32 %v0)
+ call void asm sideeffect "; use $0", "v"(i32 %v1)
+ call void asm sideeffect "; use $0", "v"(i32 %v2)
+ call void asm sideeffect "; use $0", "v"(i32 %v3)
+ call void asm sideeffect "; use $0", "v"(i32 %v4)
+ call void asm sideeffect "; use $0", "v"(i32 %v5)
+ call void asm sideeffect "; use $0", "v"(i32 %v6)
+ call void asm sideeffect "; use $0", "v"(i32 %v7)
+ call void asm sideeffect "; use $0", "v"(i32 %v8)
+ call void asm sideeffect "; use $0", "v"(i32 %v9)
+ call void asm sideeffect "; use $0", "v"(i32 %v10)
+ call void asm sideeffect "; use $0", "v"(i32 %v11)
+ call void asm sideeffect "; use $0", "v"(i32 %v12)
+ call void asm sideeffect "; use $0", "v"(i32 %v13)
+ call void asm sideeffect "; use $0", "v"(i32 %v14)
+ call void asm sideeffect "; use $0", "v"(i32 %v15)
+ call void asm sideeffect "; use $0", "v"(i32 %v16)
+ call void asm sideeffect "; use $0", "v"(i32 %v17)
+ call void asm sideeffect "; use $0", "v"(i32 %v18)
+ call void asm sideeffect "; use $0", "v"(i32 %v19)
+ call void asm sideeffect "; use $0", "v"(i32 %v20)
+ call void asm sideeffect "; use $0", "v"(i32 %v21)
+ call void asm sideeffect "; use $0", "v"(i32 %v22)
+ call void asm sideeffect "; use $0", "v"(i32 %v23)
+ call void asm sideeffect "; use $0", "v"(i32 %v24)
+ call void asm sideeffect "; use $0", "v"(i32 %v25)
+ call void asm sideeffect "; use $0", "v"(i32 %v26)
+ call void asm sideeffect "; use $0", "v"(i32 %v27)
+ call void asm sideeffect "; use $0", "v"(i32 %v28)
+ call void asm sideeffect "; use $0", "v"(i32 %v29)
+ call void asm sideeffect "; use $0", "v"(i32 %v30)
+ call void asm sideeffect "; use $0", "v"(i32 %v31)
+ call void asm sideeffect "; use $0", "v"(i32 %v32)
+ call void asm sideeffect "; use $0", "v"(i32 %v33)
+ call void asm sideeffect "; use $0", "v"(i32 %v34)
+ call void asm sideeffect "; use $0", "v"(i32 %v35)
+ call void asm sideeffect "; use $0", "v"(i32 %v36)
+ call void asm sideeffect "; use $0", "v"(i32 %v37)
+ call void asm sideeffect "; use $0", "v"(i32 %v38)
+ call void asm sideeffect "; use $0", "v"(i32 %v39)
+ call void asm sideeffect "; use $0", "v"(i32 %v40)
+ call void asm sideeffect "; use $0", "v"(i32 %v41)
+ call void asm sideeffect "; use $0", "v"(i32 %v42)
+ call void asm sideeffect "; use $0", "v"(i32 %v43)
+ call void asm sideeffect "; use $0", "v"(i32 %v44)
+ call void asm sideeffect "; use $0", "v"(i32 %v45)
+ call void asm sideeffect "; use $0", "v"(i32 %v46)
+ call void asm sideeffect "; use $0", "v"(i32 %v47)
+ call void asm sideeffect "; use $0", "v"(i32 %v48)
+ call void asm sideeffect "; use $0", "v"(i32 %v49)
+ call void asm sideeffect "; use $0", "v"(i32 %v50)
+ call void asm sideeffect "; use $0", "v"(i32 %v51)
+ call void asm sideeffect "; use $0", "v"(i32 %v52)
+ call void asm sideeffect "; use $0", "v"(i32 %v53)
+ call void asm sideeffect "; use $0", "v"(i32 %v54)
+ call void asm sideeffect "; use $0", "v"(i32 %v55)
+ call void asm sideeffect "; use $0", "v"(i32 %v56)
+ call void asm sideeffect "; use $0", "v"(i32 %v57)
+ call void asm sideeffect "; use $0", "v"(i32 %v58)
+ call void asm sideeffect "; use $0", "v"(i32 %v59)
+ call void asm sideeffect "; use $0", "v"(i32 %v60)
+ call void asm sideeffect "; use $0", "v"(i32 %v61)
+ call void asm sideeffect "; use $0", "v"(i32 %v62)
+ call void asm sideeffect "; use $0", "v"(i32 %v63)
+ call void asm sideeffect "; use $0", "v"(i32 %v64)
+ call void asm sideeffect "; use $0", "v"(i32 %v65)
+ call void asm sideeffect "; use $0", "v"(i32 %v66)
+ call void asm sideeffect "; use $0", "v"(i32 %v67)
+ call void asm sideeffect "; use $0", "v"(i32 %v68)
+ call void asm sideeffect "; use $0", "v"(i32 %v69)
+ call void asm sideeffect "; use $0", "v"(i32 %v70)
+ call void asm sideeffect "; use $0", "v"(i32 %v71)
+ call void asm sideeffect "; use $0", "v"(i32 %v72)
+ call void asm sideeffect "; use $0", "v"(i32 %v73)
+ call void asm sideeffect "; use $0", "v"(i32 %v74)
+ call void asm sideeffect "; use $0", "v"(i32 %v75)
+ call void asm sideeffect "; use $0", "v"(i32 %v76)
+ call void asm sideeffect "; use $0", "v"(i32 %v77)
+ call void asm sideeffect "; use $0", "v"(i32 %v78)
+ call void asm sideeffect "; use $0", "v"(i32 %v79)
+ call void asm sideeffect "; use $0", "v"(i32 %v80)
+ call void asm sideeffect "; use $0", "v"(i32 %v81)
+ call void asm sideeffect "; use $0", "v"(i32 %v82)
+ call void asm sideeffect "; use $0", "v"(i32 %v83)
+ call void asm sideeffect "; use $0", "v"(i32 %v84)
+ call void asm sideeffect "; use $0", "v"(i32 %v85)
+ call void asm sideeffect "; use $0", "v"(i32 %v86)
+ call void asm sideeffect "; use $0", "v"(i32 %v87)
+ call void asm sideeffect "; use $0", "v"(i32 %v88)
+ call void asm sideeffect "; use $0", "v"(i32 %v89)
+ call void asm sideeffect "; use $0", "v"(i32 %v90)
+ call void asm sideeffect "; use $0", "v"(i32 %v91)
+ call void asm sideeffect "; use $0", "v"(i32 %v92)
+ call void asm sideeffect "; use $0", "v"(i32 %v93)
+ call void asm sideeffect "; use $0", "v"(i32 %v94)
+ call void asm sideeffect "; use $0", "v"(i32 %v95)
+ call void asm sideeffect "; use $0", "v"(i32 %v96)
+ call void asm sideeffect "; use $0", "v"(i32 %v97)
+ call void asm sideeffect "; use $0", "v"(i32 %v98)
+ call void asm sideeffect "; use $0", "v"(i32 %v99)
+ call void asm sideeffect "; use $0", "v"(i32 %v100)
+ call void asm sideeffect "; use $0", "v"(i32 %v101)
+ call void asm sideeffect "; use $0", "v"(i32 %v102)
+ call void asm sideeffect "; use $0", "v"(i32 %v103)
+ call void asm sideeffect "; use $0", "v"(i32 %v104)
+ call void asm sideeffect "; use $0", "v"(i32 %v105)
+ call void asm sideeffect "; use $0", "v"(i32 %v106)
+ call void asm sideeffect "; use $0", "v"(i32 %v107)
+ call void asm sideeffect "; use $0", "v"(i32 %v108)
+ call void asm sideeffect "; use $0", "v"(i32 %v109)
+ call void asm sideeffect "; use $0", "v"(i32 %v110)
+ call void asm sideeffect "; use $0", "v"(i32 %v111)
+ call void asm sideeffect "; use $0", "v"(i32 %v112)
+ call void asm sideeffect "; use $0", "v"(i32 %v113)
+ call void asm sideeffect "; use $0", "v"(i32 %v114)
+ call void asm sideeffect "; use $0", "v"(i32 %v115)
+ call void asm sideeffect "; use $0", "v"(i32 %v116)
+ call void asm sideeffect "; use $0", "v"(i32 %v117)
+ call void asm sideeffect "; use $0", "v"(i32 %v118)
+ call void asm sideeffect "; use $0", "v"(i32 %v119)
+ call void asm sideeffect "; use $0", "v"(i32 %v120)
+ call void asm sideeffect "; use $0", "v"(i32 %v121)
+ call void asm sideeffect "; use $0", "v"(i32 %v122)
+ call void asm sideeffect "; use $0", "v"(i32 %v123)
+ call void asm sideeffect "; use $0", "v"(i32 %v124)
+ call void asm sideeffect "; use $0", "v"(i32 %v125)
+ call void asm sideeffect "; use $0", "v"(i32 %v126)
+ call void asm sideeffect "; use $0", "v"(i32 %v127)
+ call void asm sideeffect "; use $0", "v"(i32 %v128)
+ call void asm sideeffect "; use $0", "v"(i32 %v129)
+ call void asm sideeffect "; use $0", "v"(i32 %v130)
+ call void asm sideeffect "; use $0", "v"(i32 %v131)
+ call void asm sideeffect "; use $0", "v"(i32 %v132)
+ call void asm sideeffect "; use $0", "v"(i32 %v133)
+ call void asm sideeffect "; use $0", "v"(i32 %v134)
+ call void asm sideeffect "; use $0", "v"(i32 %v135)
+ call void asm sideeffect "; use $0", "v"(i32 %v136)
+ call void asm sideeffect "; use $0", "v"(i32 %v137)
+ call void asm sideeffect "; use $0", "v"(i32 %v138)
+ call void asm sideeffect "; use $0", "v"(i32 %v139)
+ call void asm sideeffect "; use $0", "v"(i32 %v140)
+ call void asm sideeffect "; use $0", "v"(i32 %v141)
+ call void asm sideeffect "; use $0", "v"(i32 %v142)
+ call void asm sideeffect "; use $0", "v"(i32 %v143)
+ call void asm sideeffect "; use $0", "v"(i32 %v144)
+ call void asm sideeffect "; use $0", "v"(i32 %v145)
+ call void asm sideeffect "; use $0", "v"(i32 %v146)
+ call void asm sideeffect "; use $0", "v"(i32 %v147)
+ call void asm sideeffect "; use $0", "v"(i32 %v148)
+ call void asm sideeffect "; use $0", "v"(i32 %v149)
+ call void asm sideeffect "; use $0", "v"(i32 %v150)
+ call void asm sideeffect "; use $0", "v"(i32 %v151)
+ call void asm sideeffect "; use $0", "v"(i32 %v152)
+ call void asm sideeffect "; use $0", "v"(i32 %v153)
+ call void asm sideeffect "; use $0", "v"(i32 %v154)
+ call void asm sideeffect "; use $0", "v"(i32 %v155)
+ call void asm sideeffect "; use $0", "v"(i32 %v156)
+ call void asm sideeffect "; use $0", "v"(i32 %v157)
+ call void asm sideeffect "; use $0", "v"(i32 %v158)
+ call void asm sideeffect "; use $0", "v"(i32 %v159)
+ call void asm sideeffect "; use $0", "v"(i32 %v160)
+ call void asm sideeffect "; use $0", "v"(i32 %v161)
+ call void asm sideeffect "; use $0", "v"(i32 %v162)
+ call void asm sideeffect "; use $0", "v"(i32 %v163)
+ call void asm sideeffect "; use $0", "v"(i32 %v164)
+ call void asm sideeffect "; use $0", "v"(i32 %v165)
+ call void asm sideeffect "; use $0", "v"(i32 %v166)
+ call void asm sideeffect "; use $0", "v"(i32 %v167)
+ call void asm sideeffect "; use $0", "v"(i32 %v168)
+ call void asm sideeffect "; use $0", "v"(i32 %v169)
+ call void asm sideeffect "; use $0", "v"(i32 %v170)
+ call void asm sideeffect "; use $0", "v"(i32 %v171)
+ call void asm sideeffect "; use $0", "v"(i32 %v172)
+ call void asm sideeffect "; use $0", "v"(i32 %v173)
+ call void asm sideeffect "; use $0", "v"(i32 %v174)
+ call void asm sideeffect "; use $0", "v"(i32 %v175)
+ call void asm sideeffect "; use $0", "v"(i32 %v176)
+ call void asm sideeffect "; use $0", "v"(i32 %v177)
+ call void asm sideeffect "; use $0", "v"(i32 %v178)
+ call void asm sideeffect "; use $0", "v"(i32 %v179)
+ call void asm sideeffect "; use $0", "v"(i32 %v180)
+ call void asm sideeffect "; use $0", "v"(i32 %v181)
+ call void asm sideeffect "; use $0", "v"(i32 %v182)
+ call void asm sideeffect "; use $0", "v"(i32 %v183)
+ call void asm sideeffect "; use $0", "v"(i32 %v184)
+ call void asm sideeffect "; use $0", "v"(i32 %v185)
+ call void asm sideeffect "; use $0", "v"(i32 %v186)
+ call void asm sideeffect "; use $0", "v"(i32 %v187)
+ call void asm sideeffect "; use $0", "v"(i32 %v188)
+ call void asm sideeffect "; use $0", "v"(i32 %v189)
+ call void asm sideeffect "; use $0", "v"(i32 %v190)
+ call void asm sideeffect "; use $0", "v"(i32 %v191)
+ call void asm sideeffect "; use $0", "v"(i32 %v192)
+ call void asm sideeffect "; use $0", "v"(i32 %v193)
+ call void asm sideeffect "; use $0", "v"(i32 %v194)
+ call void asm sideeffect "; use $0", "v"(i32 %v195)
+ call void asm sideeffect "; use $0", "v"(i32 %v196)
+ call void asm sideeffect "; use $0", "v"(i32 %v197)
+ call void asm sideeffect "; use $0", "v"(i32 %v198)
+ call void asm sideeffect "; use $0", "v"(i32 %v199)
+ call void asm sideeffect "; use $0", "v"(i32 %v200)
+ call void asm sideeffect "; use $0", "v"(i32 %v201)
+ call void asm sideeffect "; use $0", "v"(i32 %v202)
+ call void asm sideeffect "; use $0", "v"(i32 %v203)
+ call void asm sideeffect "; use $0", "v"(i32 %v204)
+ call void asm sideeffect "; use $0", "v"(i32 %v205)
+ call void asm sideeffect "; use $0", "v"(i32 %v206)
+ call void asm sideeffect "; use $0", "v"(i32 %v207)
+ call void asm sideeffect "; use $0", "v"(i32 %v208)
+ call void asm sideeffect "; use $0", "v"(i32 %v209)
+ call void asm sideeffect "; use $0", "v"(i32 %v210)
+ call void asm sideeffect "; use $0", "v"(i32 %v211)
+ call void asm sideeffect "; use $0", "v"(i32 %v212)
+ call void asm sideeffect "; use $0", "v"(i32 %v213)
+ call void asm sideeffect "; use $0", "v"(i32 %v214)
+ call void asm sideeffect "; use $0", "v"(i32 %v215)
+ call void asm sideeffect "; use $0", "v"(i32 %v216)
+ call void asm sideeffect "; use $0", "v"(i32 %v217)
+ call void asm sideeffect "; use $0", "v"(i32 %v218)
+ call void asm sideeffect "; use $0", "v"(i32 %v219)
+ call void asm sideeffect "; use $0", "v"(i32 %v220)
+ call void asm sideeffect "; use $0", "v"(i32 %v221)
+ call void asm sideeffect "; use $0", "v"(i32 %v222)
+ call void asm sideeffect "; use $0", "v"(i32 %v223)
+ call void asm sideeffect "; use $0", "v"(i32 %v224)
+ call void asm sideeffect "; use $0", "v"(i32 %v225)
+ call void asm sideeffect "; use $0", "v"(i32 %v226)
+ call void asm sideeffect "; use $0", "v"(i32 %v227)
+ call void asm sideeffect "; use $0", "v"(i32 %v228)
+ call void asm sideeffect "; use $0", "v"(i32 %v229)
+ call void asm sideeffect "; use $0", "v"(i32 %v230)
+ call void asm sideeffect "; use $0", "v"(i32 %v231)
+ call void asm sideeffect "; use $0", "v"(i32 %v232)
+ call void asm sideeffect "; use $0", "v"(i32 %v233)
+ call void asm sideeffect "; use $0", "v"(i32 %v234)
+ call void asm sideeffect "; use $0", "v"(i32 %v235)
+ call void asm sideeffect "; use $0", "v"(i32 %v236)
+ call void asm sideeffect "; use $0", "v"(i32 %v237)
+ call void asm sideeffect "; use $0", "v"(i32 %v238)
+ call void asm sideeffect "; use $0", "v"(i32 %v239)
+ call void asm sideeffect "; use $0", "v"(i32 %v240)
+ call void asm sideeffect "; use $0", "v"(i32 %v241)
+ call void asm sideeffect "; use $0", "v"(i32 %v242)
+ call void asm sideeffect "; use $0", "v"(i32 %v243)
+ call void asm sideeffect "; use $0", "v"(i32 %v244)
+ call void asm sideeffect "; use $0", "v"(i32 %v245)
+ call void asm sideeffect "; use $0", "v"(i32 %v246)
+ call void asm sideeffect "; use $0", "v"(i32 %v247)
+ call void asm sideeffect "; use $0", "v"(i32 %v248)
+ call void asm sideeffect "; use $0", "v"(i32 %v249)
+ call void asm sideeffect "; use $0", "v"(i32 %v250)
+ call void asm sideeffect "; use $0", "v"(i32 %v251)
+ call void asm sideeffect "; use $0", "v"(i32 %v252)
+ call void asm sideeffect "; use $0", "v"(i32 %v253)
+ call void asm sideeffect "; use $0", "v"(i32 %v254)
+ call void asm sideeffect "; use $0", "v"(i32 %v255)
+ ret void
+}
+
define internal void @use512vgprs() {
%v0 = call <32 x i32> asm sideeffect "; def $0", "=v"()
%v1 = call <32 x i32> asm sideeffect "; def $0", "=v"()
@@ -560,12 +1076,13 @@ define amdgpu_kernel void @k256_w8() #2568 {
; GFX90A: NumVgprs: 64
; GFX90A: NumAgprs: 0
; GFX90A: TotalNumVgprs: 64
-define amdgpu_kernel void @k256_w8_no_agprs() #2568 {
+define amdgpu_kernel void @k256_w8_no_agprs() #2569 {
call void @use256vgprs()
ret void
}
attributes #2568 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" }
+attributes #2569 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" "amdgpu-no-agpr" }
; GCN-LABEL: {{^}}k256_w4:
; GFX90A: NumVgprs: 64
@@ -581,12 +1098,13 @@ define amdgpu_kernel void @k256_w4() #2564 {
; GFX90A: NumVgprs: 128
; GFX90A: NumAgprs: 0
; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k256_w4_no_agprs() #2564 {
+define amdgpu_kernel void @k256_w4_no_agprs() #2565 {
call void @use256vgprs()
ret void
}
attributes #2564 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" }
+attributes #2565 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" "amdgpu-no-agpr" }
; GCN-LABEL: {{^}}k256_w2:
; GFX90A: NumVgprs: 128
@@ -602,12 +1120,13 @@ define amdgpu_kernel void @k256_w2() #2562 {
; GFX90A: NumVgprs: 256
; GFX90A: NumAgprs: 0
; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k256_w2_no_agprs() #2562 {
+define amdgpu_kernel void @k256_w2_no_agprs() #2563 {
call void @use256vgprs()
ret void
}
attributes #2562 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" }
+attributes #2563 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
; GCN-LABEL: {{^}}k256_w1:
; GFX90A: NumVgprs: 256
@@ -634,7 +1153,7 @@ attributes #2561 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-wa
; GFX90A: NumVgprs: 256
; GFX90A: NumAgprs: 0
; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k512_no_agprs() #512 {
+define amdgpu_kernel void @k512_no_agprs() #513 {
call void @use256vgprs()
ret void
}
@@ -677,13 +1196,39 @@ define void @f512() #512 {
ret void
}
+; GCN-LABEL: {{^}}f512_no_agpr:
+; GFX90A: NumVgprs: 256
+; GFX90A: NumAgprs: 0
+define void @f512_no_agpr() #513 {
+ call void @use256vgprs_no_agpr()
+ ret void
+}
+
+; GCN-LABEL: {{^}}f512_no_agpr_ub:
+; GFX90A: NumVgprs: 256
+; GFX90A: NumAgprs: 0
+define void @f512_no_agpr_ub() #513 {
+ call void @use256vgprs()
+ ret void
+}
+
attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
+attributes #513 = { nounwind "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" }
; GCN-LABEL: {{^}}k1024:
+; GFX90A: NumVgprs: 64
+; GFX90A: NumAgprs: 64
+; GFX90A: TotalNumVgprs: 128
+define amdgpu_kernel void @k1024() #1024 {
+ call void @use256vgprs()
+ ret void
+}
+
+; GCN-LABEL: {{^}}k1024_no_agprs:
; GFX90A: NumVgprs: 128
; GFX90A: NumAgprs: 0
; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k1024() #1024 {
+define amdgpu_kernel void @k1024_no_agprs() #1025 {
call void @use256vgprs()
ret void
}
@@ -698,4 +1243,61 @@ define amdgpu_kernel void @k1024_call() #1024 {
ret void
}
+; GCN-LABEL: {{^}}k1024_call_no_agprs:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define amdgpu_kernel void @k1024_call_no_agprs() #1025 {
+ call void @use256vgprs()
+ ret void
+}
+
+; @foo uses an AGPRs so amdgpu-no-agpr is undefined
+; GCN-LABEL: {{^}}k1024_call_no_agprs_ub_callee:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 1
+; GFX90A: TotalNumVgprs: 129
+define amdgpu_kernel void @k1024_call_no_agprs_ub_callee() #1025 {
+ call void @foo()
+ call void @use256vgprs()
+ ret void
+}
+
+; GCN-LABEL: {{^}}f1024_0:
+; GFX90A: NumVgprs: 32
+; GFX90A: NumAgprs: 1
+; GFX90A: TotalNumVgprs: 33
+define void @f1024_0() #1024 {
+ call void @foo()
+ ret void
+}
+
+; GCN-LABEL: {{^}}f1024_1:
+; GFX90A: NumVgprs: 64
+; GFX90A: NumAgprs: 32
+; GFX90A: TotalNumVgprs: 96
+define void @f1024_1() #1024 {
+ call void @use256vgprs()
+ ret void
+}
+
+; GCN-LABEL: {{^}}f1024_call_no_agprs:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define void @f1024_call_no_agprs() #1025 {
+ call void @use256vgprs_no_agpr()
+ ret void
+}
+
+; GCN-LABEL: {{^}}f1024_call_no_agprs_ub:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define void @f1024_call_no_agprs_ub() #1025 {
+ call void @use256vgprs()
+ ret void
+}
+
attributes #1024 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" }
+attributes #1025 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-no-agpr" }
More information about the llvm-commits
mailing list