[llvm] ccad5e7 - AMDGPU: Respect amdgpu-no-agpr in functions and with calls (#128147)

via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 22 18:00:41 PST 2025


Author: Matt Arsenault
Date: 2025-02-23T09:00:37+07:00
New Revision: ccad5e77442f7f237939395ebce1ae7adf187380

URL: https://github.com/llvm/llvm-project/commit/ccad5e77442f7f237939395ebce1ae7adf187380
DIFF: https://github.com/llvm/llvm-project/commit/ccad5e77442f7f237939395ebce1ae7adf187380.diff

LOG: AMDGPU: Respect amdgpu-no-agpr in functions and with calls (#128147)

Remove the MIR scan to detect whether AGPRs are used or not,
and the special case for callable functions. This behavior was
confusing, and not overridable. The amdgpu-no-agpr attribute was
intended to avoid this imprecise heuristic for how many AGPRs to
allocate. It was also too confusing to make this interact with
the pending amdgpu-num-agpr replacement for amdgpu-no-agpr.

Also adds an xfail-ish test where the register allocator asserts
after allocation fails which I ran into.

Future work should reintroduce a more refined MIR scan to estimate
AGPR pressure for how to split AGPRs and VGPRs.

Added: 
    llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
    llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
    llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
    llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c5efb89d8b2db..a83fc2d188de2 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -64,6 +64,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
   }
 
   MayNeedAGPRs = ST.hasMAIInsts();
+  if (ST.hasGFX90AInsts() &&
+      ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
+      !mayUseAGPRs(F))
+    MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
 
   if (AMDGPU::isChainCC(CC)) {
     // Chain functions don't receive an SP from their caller, but are free to
@@ -98,13 +102,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
       ImplicitArgPtr = true;
   } else {
     ImplicitArgPtr = false;
-    MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
-                               MaxKernArgAlign);
-
-    if (ST.hasGFX90AInsts() &&
-        ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
-        !mayUseAGPRs(F))
-      MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
+    MaxKernArgAlign =
+        std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);
   }
 
   if (!AMDGPU::isGraphics(CC) ||
@@ -783,44 +782,3 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
 bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
   return !F.hasFnAttribute("amdgpu-no-agpr");
 }
-
-bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
-  if (UsesAGPRs)
-    return *UsesAGPRs;
-
-  if (!mayNeedAGPRs()) {
-    UsesAGPRs = false;
-    return false;
-  }
-
-  if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
-      MF.getFrameInfo().hasCalls()) {
-    UsesAGPRs = true;
-    return true;
-  }
-
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
-    const Register Reg = Register::index2VirtReg(I);
-    const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
-    if (RC && SIRegisterInfo::isAGPRClass(RC)) {
-      UsesAGPRs = true;
-      return true;
-    }
-    if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
-      // Defer caching UsesAGPRs, function might not yet been regbank selected.
-      return true;
-    }
-  }
-
-  for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
-    if (MRI.isPhysRegUsed(Reg)) {
-      UsesAGPRs = true;
-      return true;
-    }
-  }
-
-  UsesAGPRs = false;
-  return false;
-}

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 2e2716f1ce888..740f752bc93b7 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -494,8 +494,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   // scheduler stage.
   unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
 
-  mutable std::optional<bool> UsesAGPRs;
-
   MCPhysReg getNextUserSGPR() const;
 
   MCPhysReg getNextSystemSGPR() const;
@@ -1126,9 +1124,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   // has a call which may use it.
   bool mayUseAGPRs(const Function &F) const;
 
-  // \returns true if a function needs or may need AGPRs.
-  bool usesAGPRs(const MachineFunction &MF) const;
-
   /// \returns Default/requested number of work groups for this function.
   SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 71c720ed09b5f..924aa45559366 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -585,7 +585,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
   // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
   //       register file accordingly.
   if (ST.hasGFX90AInsts()) {
-    if (MFI->usesAGPRs(MF)) {
+    if (MFI->mayNeedAGPRs()) {
       MaxNumVGPRs /= 2;
       MaxNumAGPRs = MaxNumVGPRs;
     } else {

diff  --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
new file mode 100644
index 0000000000000..5f2e9af378f08
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CRASH %s
+
+; CRASH: error: <unknown>:0:0: no registers from class available to allocate in function 'no_free_vgprs_at_agpr_to_agpr_copy'
+; CRASH: Cannot access invalid iterator
+
+define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
+  %asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1", "=${v[0:31]},=${a[0:15]}"()
+  %vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0
+  %agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1
+  %mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0)
+  %agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0)
+  %agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma)
+  call void asm sideeffect "; use $0 $1", "{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0)
+  ret void
+}
+
+declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
+declare noundef i32 @llvm.amdgcn.workitem.id.x() #2
+
+attributes #0 = { "amdgpu-no-agpr" "amdgpu-waves-per-eu"="6,6" }
+attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

diff  --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 4ce46bbaf45ac..d1b01eeee11a4 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -240,7 +240,7 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
 }
 
 ; Check that we do make use of v32 if there are no AGPRs present in the function
-define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #5 {
 ; GFX908-LABEL: no_agpr_no_reserve:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
@@ -1144,5 +1144,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
 attributes #0 = { "amdgpu-waves-per-eu"="6,6" }
 attributes #1 = { convergent nounwind readnone willreturn }
 attributes #2 = { nounwind readnone willreturn }
-attributes #3 = { "amdgpu-waves-per-eu"="7,7" }
+attributes #3 = { "amdgpu-waves-per-eu"="7,7" "amdgpu-no-agpr" }
 attributes #4 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-flat-work-group-size"="1024,1024" }
+attributes #5 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-no-agpr" }

diff  --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 322686b0144a0..f6f78f134fc1f 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -94,9 +94,20 @@ bb3:
   ret void
 }
 
-; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry:
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_noagpr:
+; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
+; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
+define void @test_mfma_f32_32x32x1f32_nonentry_noagpr(ptr addrspace(1) %arg) #0 {
+bb:
+  %in.1 = load <32 x float>, ptr addrspace(1) %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
+  store <32 x float> %mai.1, ptr addrspace(1) %arg
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_with_agpr:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) #0 {
+define void @test_mfma_f32_32x32x1f32_nonentry_with_agpr(ptr addrspace(1) %arg) #3 {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -109,3 +120,4 @@ declare void @foo()
 attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
 attributes #1 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
 attributes #2 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" }
+attributes #3 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
index ed57caadea5c5..583d1f9e4a9fd 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
@@ -6,7 +6,7 @@
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
+  attributes #0 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-no-agpr" }
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
index 040799435db4a..03978e68e81b4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
@@ -518,6 +518,522 @@ define internal void @use256vgprs() {
   ret void
 }
 
+define internal void @use256vgprs_no_agpr() "amdgpu-no-agpr" {
+  %v0 = call i32 asm sideeffect "; def $0", "=v"()
+  %v1 = call i32 asm sideeffect "; def $0", "=v"()
+  %v2 = call i32 asm sideeffect "; def $0", "=v"()
+  %v3 = call i32 asm sideeffect "; def $0", "=v"()
+  %v4 = call i32 asm sideeffect "; def $0", "=v"()
+  %v5 = call i32 asm sideeffect "; def $0", "=v"()
+  %v6 = call i32 asm sideeffect "; def $0", "=v"()
+  %v7 = call i32 asm sideeffect "; def $0", "=v"()
+  %v8 = call i32 asm sideeffect "; def $0", "=v"()
+  %v9 = call i32 asm sideeffect "; def $0", "=v"()
+  %v10 = call i32 asm sideeffect "; def $0", "=v"()
+  %v11 = call i32 asm sideeffect "; def $0", "=v"()
+  %v12 = call i32 asm sideeffect "; def $0", "=v"()
+  %v13 = call i32 asm sideeffect "; def $0", "=v"()
+  %v14 = call i32 asm sideeffect "; def $0", "=v"()
+  %v15 = call i32 asm sideeffect "; def $0", "=v"()
+  %v16 = call i32 asm sideeffect "; def $0", "=v"()
+  %v17 = call i32 asm sideeffect "; def $0", "=v"()
+  %v18 = call i32 asm sideeffect "; def $0", "=v"()
+  %v19 = call i32 asm sideeffect "; def $0", "=v"()
+  %v20 = call i32 asm sideeffect "; def $0", "=v"()
+  %v21 = call i32 asm sideeffect "; def $0", "=v"()
+  %v22 = call i32 asm sideeffect "; def $0", "=v"()
+  %v23 = call i32 asm sideeffect "; def $0", "=v"()
+  %v24 = call i32 asm sideeffect "; def $0", "=v"()
+  %v25 = call i32 asm sideeffect "; def $0", "=v"()
+  %v26 = call i32 asm sideeffect "; def $0", "=v"()
+  %v27 = call i32 asm sideeffect "; def $0", "=v"()
+  %v28 = call i32 asm sideeffect "; def $0", "=v"()
+  %v29 = call i32 asm sideeffect "; def $0", "=v"()
+  %v30 = call i32 asm sideeffect "; def $0", "=v"()
+  %v31 = call i32 asm sideeffect "; def $0", "=v"()
+  %v32 = call i32 asm sideeffect "; def $0", "=v"()
+  %v33 = call i32 asm sideeffect "; def $0", "=v"()
+  %v34 = call i32 asm sideeffect "; def $0", "=v"()
+  %v35 = call i32 asm sideeffect "; def $0", "=v"()
+  %v36 = call i32 asm sideeffect "; def $0", "=v"()
+  %v37 = call i32 asm sideeffect "; def $0", "=v"()
+  %v38 = call i32 asm sideeffect "; def $0", "=v"()
+  %v39 = call i32 asm sideeffect "; def $0", "=v"()
+  %v40 = call i32 asm sideeffect "; def $0", "=v"()
+  %v41 = call i32 asm sideeffect "; def $0", "=v"()
+  %v42 = call i32 asm sideeffect "; def $0", "=v"()
+  %v43 = call i32 asm sideeffect "; def $0", "=v"()
+  %v44 = call i32 asm sideeffect "; def $0", "=v"()
+  %v45 = call i32 asm sideeffect "; def $0", "=v"()
+  %v46 = call i32 asm sideeffect "; def $0", "=v"()
+  %v47 = call i32 asm sideeffect "; def $0", "=v"()
+  %v48 = call i32 asm sideeffect "; def $0", "=v"()
+  %v49 = call i32 asm sideeffect "; def $0", "=v"()
+  %v50 = call i32 asm sideeffect "; def $0", "=v"()
+  %v51 = call i32 asm sideeffect "; def $0", "=v"()
+  %v52 = call i32 asm sideeffect "; def $0", "=v"()
+  %v53 = call i32 asm sideeffect "; def $0", "=v"()
+  %v54 = call i32 asm sideeffect "; def $0", "=v"()
+  %v55 = call i32 asm sideeffect "; def $0", "=v"()
+  %v56 = call i32 asm sideeffect "; def $0", "=v"()
+  %v57 = call i32 asm sideeffect "; def $0", "=v"()
+  %v58 = call i32 asm sideeffect "; def $0", "=v"()
+  %v59 = call i32 asm sideeffect "; def $0", "=v"()
+  %v60 = call i32 asm sideeffect "; def $0", "=v"()
+  %v61 = call i32 asm sideeffect "; def $0", "=v"()
+  %v62 = call i32 asm sideeffect "; def $0", "=v"()
+  %v63 = call i32 asm sideeffect "; def $0", "=v"()
+  %v64 = call i32 asm sideeffect "; def $0", "=v"()
+  %v65 = call i32 asm sideeffect "; def $0", "=v"()
+  %v66 = call i32 asm sideeffect "; def $0", "=v"()
+  %v67 = call i32 asm sideeffect "; def $0", "=v"()
+  %v68 = call i32 asm sideeffect "; def $0", "=v"()
+  %v69 = call i32 asm sideeffect "; def $0", "=v"()
+  %v70 = call i32 asm sideeffect "; def $0", "=v"()
+  %v71 = call i32 asm sideeffect "; def $0", "=v"()
+  %v72 = call i32 asm sideeffect "; def $0", "=v"()
+  %v73 = call i32 asm sideeffect "; def $0", "=v"()
+  %v74 = call i32 asm sideeffect "; def $0", "=v"()
+  %v75 = call i32 asm sideeffect "; def $0", "=v"()
+  %v76 = call i32 asm sideeffect "; def $0", "=v"()
+  %v77 = call i32 asm sideeffect "; def $0", "=v"()
+  %v78 = call i32 asm sideeffect "; def $0", "=v"()
+  %v79 = call i32 asm sideeffect "; def $0", "=v"()
+  %v80 = call i32 asm sideeffect "; def $0", "=v"()
+  %v81 = call i32 asm sideeffect "; def $0", "=v"()
+  %v82 = call i32 asm sideeffect "; def $0", "=v"()
+  %v83 = call i32 asm sideeffect "; def $0", "=v"()
+  %v84 = call i32 asm sideeffect "; def $0", "=v"()
+  %v85 = call i32 asm sideeffect "; def $0", "=v"()
+  %v86 = call i32 asm sideeffect "; def $0", "=v"()
+  %v87 = call i32 asm sideeffect "; def $0", "=v"()
+  %v88 = call i32 asm sideeffect "; def $0", "=v"()
+  %v89 = call i32 asm sideeffect "; def $0", "=v"()
+  %v90 = call i32 asm sideeffect "; def $0", "=v"()
+  %v91 = call i32 asm sideeffect "; def $0", "=v"()
+  %v92 = call i32 asm sideeffect "; def $0", "=v"()
+  %v93 = call i32 asm sideeffect "; def $0", "=v"()
+  %v94 = call i32 asm sideeffect "; def $0", "=v"()
+  %v95 = call i32 asm sideeffect "; def $0", "=v"()
+  %v96 = call i32 asm sideeffect "; def $0", "=v"()
+  %v97 = call i32 asm sideeffect "; def $0", "=v"()
+  %v98 = call i32 asm sideeffect "; def $0", "=v"()
+  %v99 = call i32 asm sideeffect "; def $0", "=v"()
+  %v100 = call i32 asm sideeffect "; def $0", "=v"()
+  %v101 = call i32 asm sideeffect "; def $0", "=v"()
+  %v102 = call i32 asm sideeffect "; def $0", "=v"()
+  %v103 = call i32 asm sideeffect "; def $0", "=v"()
+  %v104 = call i32 asm sideeffect "; def $0", "=v"()
+  %v105 = call i32 asm sideeffect "; def $0", "=v"()
+  %v106 = call i32 asm sideeffect "; def $0", "=v"()
+  %v107 = call i32 asm sideeffect "; def $0", "=v"()
+  %v108 = call i32 asm sideeffect "; def $0", "=v"()
+  %v109 = call i32 asm sideeffect "; def $0", "=v"()
+  %v110 = call i32 asm sideeffect "; def $0", "=v"()
+  %v111 = call i32 asm sideeffect "; def $0", "=v"()
+  %v112 = call i32 asm sideeffect "; def $0", "=v"()
+  %v113 = call i32 asm sideeffect "; def $0", "=v"()
+  %v114 = call i32 asm sideeffect "; def $0", "=v"()
+  %v115 = call i32 asm sideeffect "; def $0", "=v"()
+  %v116 = call i32 asm sideeffect "; def $0", "=v"()
+  %v117 = call i32 asm sideeffect "; def $0", "=v"()
+  %v118 = call i32 asm sideeffect "; def $0", "=v"()
+  %v119 = call i32 asm sideeffect "; def $0", "=v"()
+  %v120 = call i32 asm sideeffect "; def $0", "=v"()
+  %v121 = call i32 asm sideeffect "; def $0", "=v"()
+  %v122 = call i32 asm sideeffect "; def $0", "=v"()
+  %v123 = call i32 asm sideeffect "; def $0", "=v"()
+  %v124 = call i32 asm sideeffect "; def $0", "=v"()
+  %v125 = call i32 asm sideeffect "; def $0", "=v"()
+  %v126 = call i32 asm sideeffect "; def $0", "=v"()
+  %v127 = call i32 asm sideeffect "; def $0", "=v"()
+  %v128 = call i32 asm sideeffect "; def $0", "=v"()
+  %v129 = call i32 asm sideeffect "; def $0", "=v"()
+  %v130 = call i32 asm sideeffect "; def $0", "=v"()
+  %v131 = call i32 asm sideeffect "; def $0", "=v"()
+  %v132 = call i32 asm sideeffect "; def $0", "=v"()
+  %v133 = call i32 asm sideeffect "; def $0", "=v"()
+  %v134 = call i32 asm sideeffect "; def $0", "=v"()
+  %v135 = call i32 asm sideeffect "; def $0", "=v"()
+  %v136 = call i32 asm sideeffect "; def $0", "=v"()
+  %v137 = call i32 asm sideeffect "; def $0", "=v"()
+  %v138 = call i32 asm sideeffect "; def $0", "=v"()
+  %v139 = call i32 asm sideeffect "; def $0", "=v"()
+  %v140 = call i32 asm sideeffect "; def $0", "=v"()
+  %v141 = call i32 asm sideeffect "; def $0", "=v"()
+  %v142 = call i32 asm sideeffect "; def $0", "=v"()
+  %v143 = call i32 asm sideeffect "; def $0", "=v"()
+  %v144 = call i32 asm sideeffect "; def $0", "=v"()
+  %v145 = call i32 asm sideeffect "; def $0", "=v"()
+  %v146 = call i32 asm sideeffect "; def $0", "=v"()
+  %v147 = call i32 asm sideeffect "; def $0", "=v"()
+  %v148 = call i32 asm sideeffect "; def $0", "=v"()
+  %v149 = call i32 asm sideeffect "; def $0", "=v"()
+  %v150 = call i32 asm sideeffect "; def $0", "=v"()
+  %v151 = call i32 asm sideeffect "; def $0", "=v"()
+  %v152 = call i32 asm sideeffect "; def $0", "=v"()
+  %v153 = call i32 asm sideeffect "; def $0", "=v"()
+  %v154 = call i32 asm sideeffect "; def $0", "=v"()
+  %v155 = call i32 asm sideeffect "; def $0", "=v"()
+  %v156 = call i32 asm sideeffect "; def $0", "=v"()
+  %v157 = call i32 asm sideeffect "; def $0", "=v"()
+  %v158 = call i32 asm sideeffect "; def $0", "=v"()
+  %v159 = call i32 asm sideeffect "; def $0", "=v"()
+  %v160 = call i32 asm sideeffect "; def $0", "=v"()
+  %v161 = call i32 asm sideeffect "; def $0", "=v"()
+  %v162 = call i32 asm sideeffect "; def $0", "=v"()
+  %v163 = call i32 asm sideeffect "; def $0", "=v"()
+  %v164 = call i32 asm sideeffect "; def $0", "=v"()
+  %v165 = call i32 asm sideeffect "; def $0", "=v"()
+  %v166 = call i32 asm sideeffect "; def $0", "=v"()
+  %v167 = call i32 asm sideeffect "; def $0", "=v"()
+  %v168 = call i32 asm sideeffect "; def $0", "=v"()
+  %v169 = call i32 asm sideeffect "; def $0", "=v"()
+  %v170 = call i32 asm sideeffect "; def $0", "=v"()
+  %v171 = call i32 asm sideeffect "; def $0", "=v"()
+  %v172 = call i32 asm sideeffect "; def $0", "=v"()
+  %v173 = call i32 asm sideeffect "; def $0", "=v"()
+  %v174 = call i32 asm sideeffect "; def $0", "=v"()
+  %v175 = call i32 asm sideeffect "; def $0", "=v"()
+  %v176 = call i32 asm sideeffect "; def $0", "=v"()
+  %v177 = call i32 asm sideeffect "; def $0", "=v"()
+  %v178 = call i32 asm sideeffect "; def $0", "=v"()
+  %v179 = call i32 asm sideeffect "; def $0", "=v"()
+  %v180 = call i32 asm sideeffect "; def $0", "=v"()
+  %v181 = call i32 asm sideeffect "; def $0", "=v"()
+  %v182 = call i32 asm sideeffect "; def $0", "=v"()
+  %v183 = call i32 asm sideeffect "; def $0", "=v"()
+  %v184 = call i32 asm sideeffect "; def $0", "=v"()
+  %v185 = call i32 asm sideeffect "; def $0", "=v"()
+  %v186 = call i32 asm sideeffect "; def $0", "=v"()
+  %v187 = call i32 asm sideeffect "; def $0", "=v"()
+  %v188 = call i32 asm sideeffect "; def $0", "=v"()
+  %v189 = call i32 asm sideeffect "; def $0", "=v"()
+  %v190 = call i32 asm sideeffect "; def $0", "=v"()
+  %v191 = call i32 asm sideeffect "; def $0", "=v"()
+  %v192 = call i32 asm sideeffect "; def $0", "=v"()
+  %v193 = call i32 asm sideeffect "; def $0", "=v"()
+  %v194 = call i32 asm sideeffect "; def $0", "=v"()
+  %v195 = call i32 asm sideeffect "; def $0", "=v"()
+  %v196 = call i32 asm sideeffect "; def $0", "=v"()
+  %v197 = call i32 asm sideeffect "; def $0", "=v"()
+  %v198 = call i32 asm sideeffect "; def $0", "=v"()
+  %v199 = call i32 asm sideeffect "; def $0", "=v"()
+  %v200 = call i32 asm sideeffect "; def $0", "=v"()
+  %v201 = call i32 asm sideeffect "; def $0", "=v"()
+  %v202 = call i32 asm sideeffect "; def $0", "=v"()
+  %v203 = call i32 asm sideeffect "; def $0", "=v"()
+  %v204 = call i32 asm sideeffect "; def $0", "=v"()
+  %v205 = call i32 asm sideeffect "; def $0", "=v"()
+  %v206 = call i32 asm sideeffect "; def $0", "=v"()
+  %v207 = call i32 asm sideeffect "; def $0", "=v"()
+  %v208 = call i32 asm sideeffect "; def $0", "=v"()
+  %v209 = call i32 asm sideeffect "; def $0", "=v"()
+  %v210 = call i32 asm sideeffect "; def $0", "=v"()
+  %v211 = call i32 asm sideeffect "; def $0", "=v"()
+  %v212 = call i32 asm sideeffect "; def $0", "=v"()
+  %v213 = call i32 asm sideeffect "; def $0", "=v"()
+  %v214 = call i32 asm sideeffect "; def $0", "=v"()
+  %v215 = call i32 asm sideeffect "; def $0", "=v"()
+  %v216 = call i32 asm sideeffect "; def $0", "=v"()
+  %v217 = call i32 asm sideeffect "; def $0", "=v"()
+  %v218 = call i32 asm sideeffect "; def $0", "=v"()
+  %v219 = call i32 asm sideeffect "; def $0", "=v"()
+  %v220 = call i32 asm sideeffect "; def $0", "=v"()
+  %v221 = call i32 asm sideeffect "; def $0", "=v"()
+  %v222 = call i32 asm sideeffect "; def $0", "=v"()
+  %v223 = call i32 asm sideeffect "; def $0", "=v"()
+  %v224 = call i32 asm sideeffect "; def $0", "=v"()
+  %v225 = call i32 asm sideeffect "; def $0", "=v"()
+  %v226 = call i32 asm sideeffect "; def $0", "=v"()
+  %v227 = call i32 asm sideeffect "; def $0", "=v"()
+  %v228 = call i32 asm sideeffect "; def $0", "=v"()
+  %v229 = call i32 asm sideeffect "; def $0", "=v"()
+  %v230 = call i32 asm sideeffect "; def $0", "=v"()
+  %v231 = call i32 asm sideeffect "; def $0", "=v"()
+  %v232 = call i32 asm sideeffect "; def $0", "=v"()
+  %v233 = call i32 asm sideeffect "; def $0", "=v"()
+  %v234 = call i32 asm sideeffect "; def $0", "=v"()
+  %v235 = call i32 asm sideeffect "; def $0", "=v"()
+  %v236 = call i32 asm sideeffect "; def $0", "=v"()
+  %v237 = call i32 asm sideeffect "; def $0", "=v"()
+  %v238 = call i32 asm sideeffect "; def $0", "=v"()
+  %v239 = call i32 asm sideeffect "; def $0", "=v"()
+  %v240 = call i32 asm sideeffect "; def $0", "=v"()
+  %v241 = call i32 asm sideeffect "; def $0", "=v"()
+  %v242 = call i32 asm sideeffect "; def $0", "=v"()
+  %v243 = call i32 asm sideeffect "; def $0", "=v"()
+  %v244 = call i32 asm sideeffect "; def $0", "=v"()
+  %v245 = call i32 asm sideeffect "; def $0", "=v"()
+  %v246 = call i32 asm sideeffect "; def $0", "=v"()
+  %v247 = call i32 asm sideeffect "; def $0", "=v"()
+  %v248 = call i32 asm sideeffect "; def $0", "=v"()
+  %v249 = call i32 asm sideeffect "; def $0", "=v"()
+  %v250 = call i32 asm sideeffect "; def $0", "=v"()
+  %v251 = call i32 asm sideeffect "; def $0", "=v"()
+  %v252 = call i32 asm sideeffect "; def $0", "=v"()
+  %v253 = call i32 asm sideeffect "; def $0", "=v"()
+  %v254 = call i32 asm sideeffect "; def $0", "=v"()
+  %v255 = call i32 asm sideeffect "; def $0", "=v"()
+  call void asm sideeffect "; use $0", "v"(i32 %v0)
+  call void asm sideeffect "; use $0", "v"(i32 %v1)
+  call void asm sideeffect "; use $0", "v"(i32 %v2)
+  call void asm sideeffect "; use $0", "v"(i32 %v3)
+  call void asm sideeffect "; use $0", "v"(i32 %v4)
+  call void asm sideeffect "; use $0", "v"(i32 %v5)
+  call void asm sideeffect "; use $0", "v"(i32 %v6)
+  call void asm sideeffect "; use $0", "v"(i32 %v7)
+  call void asm sideeffect "; use $0", "v"(i32 %v8)
+  call void asm sideeffect "; use $0", "v"(i32 %v9)
+  call void asm sideeffect "; use $0", "v"(i32 %v10)
+  call void asm sideeffect "; use $0", "v"(i32 %v11)
+  call void asm sideeffect "; use $0", "v"(i32 %v12)
+  call void asm sideeffect "; use $0", "v"(i32 %v13)
+  call void asm sideeffect "; use $0", "v"(i32 %v14)
+  call void asm sideeffect "; use $0", "v"(i32 %v15)
+  call void asm sideeffect "; use $0", "v"(i32 %v16)
+  call void asm sideeffect "; use $0", "v"(i32 %v17)
+  call void asm sideeffect "; use $0", "v"(i32 %v18)
+  call void asm sideeffect "; use $0", "v"(i32 %v19)
+  call void asm sideeffect "; use $0", "v"(i32 %v20)
+  call void asm sideeffect "; use $0", "v"(i32 %v21)
+  call void asm sideeffect "; use $0", "v"(i32 %v22)
+  call void asm sideeffect "; use $0", "v"(i32 %v23)
+  call void asm sideeffect "; use $0", "v"(i32 %v24)
+  call void asm sideeffect "; use $0", "v"(i32 %v25)
+  call void asm sideeffect "; use $0", "v"(i32 %v26)
+  call void asm sideeffect "; use $0", "v"(i32 %v27)
+  call void asm sideeffect "; use $0", "v"(i32 %v28)
+  call void asm sideeffect "; use $0", "v"(i32 %v29)
+  call void asm sideeffect "; use $0", "v"(i32 %v30)
+  call void asm sideeffect "; use $0", "v"(i32 %v31)
+  call void asm sideeffect "; use $0", "v"(i32 %v32)
+  call void asm sideeffect "; use $0", "v"(i32 %v33)
+  call void asm sideeffect "; use $0", "v"(i32 %v34)
+  call void asm sideeffect "; use $0", "v"(i32 %v35)
+  call void asm sideeffect "; use $0", "v"(i32 %v36)
+  call void asm sideeffect "; use $0", "v"(i32 %v37)
+  call void asm sideeffect "; use $0", "v"(i32 %v38)
+  call void asm sideeffect "; use $0", "v"(i32 %v39)
+  call void asm sideeffect "; use $0", "v"(i32 %v40)
+  call void asm sideeffect "; use $0", "v"(i32 %v41)
+  call void asm sideeffect "; use $0", "v"(i32 %v42)
+  call void asm sideeffect "; use $0", "v"(i32 %v43)
+  call void asm sideeffect "; use $0", "v"(i32 %v44)
+  call void asm sideeffect "; use $0", "v"(i32 %v45)
+  call void asm sideeffect "; use $0", "v"(i32 %v46)
+  call void asm sideeffect "; use $0", "v"(i32 %v47)
+  call void asm sideeffect "; use $0", "v"(i32 %v48)
+  call void asm sideeffect "; use $0", "v"(i32 %v49)
+  call void asm sideeffect "; use $0", "v"(i32 %v50)
+  call void asm sideeffect "; use $0", "v"(i32 %v51)
+  call void asm sideeffect "; use $0", "v"(i32 %v52)
+  call void asm sideeffect "; use $0", "v"(i32 %v53)
+  call void asm sideeffect "; use $0", "v"(i32 %v54)
+  call void asm sideeffect "; use $0", "v"(i32 %v55)
+  call void asm sideeffect "; use $0", "v"(i32 %v56)
+  call void asm sideeffect "; use $0", "v"(i32 %v57)
+  call void asm sideeffect "; use $0", "v"(i32 %v58)
+  call void asm sideeffect "; use $0", "v"(i32 %v59)
+  call void asm sideeffect "; use $0", "v"(i32 %v60)
+  call void asm sideeffect "; use $0", "v"(i32 %v61)
+  call void asm sideeffect "; use $0", "v"(i32 %v62)
+  call void asm sideeffect "; use $0", "v"(i32 %v63)
+  call void asm sideeffect "; use $0", "v"(i32 %v64)
+  call void asm sideeffect "; use $0", "v"(i32 %v65)
+  call void asm sideeffect "; use $0", "v"(i32 %v66)
+  call void asm sideeffect "; use $0", "v"(i32 %v67)
+  call void asm sideeffect "; use $0", "v"(i32 %v68)
+  call void asm sideeffect "; use $0", "v"(i32 %v69)
+  call void asm sideeffect "; use $0", "v"(i32 %v70)
+  call void asm sideeffect "; use $0", "v"(i32 %v71)
+  call void asm sideeffect "; use $0", "v"(i32 %v72)
+  call void asm sideeffect "; use $0", "v"(i32 %v73)
+  call void asm sideeffect "; use $0", "v"(i32 %v74)
+  call void asm sideeffect "; use $0", "v"(i32 %v75)
+  call void asm sideeffect "; use $0", "v"(i32 %v76)
+  call void asm sideeffect "; use $0", "v"(i32 %v77)
+  call void asm sideeffect "; use $0", "v"(i32 %v78)
+  call void asm sideeffect "; use $0", "v"(i32 %v79)
+  call void asm sideeffect "; use $0", "v"(i32 %v80)
+  call void asm sideeffect "; use $0", "v"(i32 %v81)
+  call void asm sideeffect "; use $0", "v"(i32 %v82)
+  call void asm sideeffect "; use $0", "v"(i32 %v83)
+  call void asm sideeffect "; use $0", "v"(i32 %v84)
+  call void asm sideeffect "; use $0", "v"(i32 %v85)
+  call void asm sideeffect "; use $0", "v"(i32 %v86)
+  call void asm sideeffect "; use $0", "v"(i32 %v87)
+  call void asm sideeffect "; use $0", "v"(i32 %v88)
+  call void asm sideeffect "; use $0", "v"(i32 %v89)
+  call void asm sideeffect "; use $0", "v"(i32 %v90)
+  call void asm sideeffect "; use $0", "v"(i32 %v91)
+  call void asm sideeffect "; use $0", "v"(i32 %v92)
+  call void asm sideeffect "; use $0", "v"(i32 %v93)
+  call void asm sideeffect "; use $0", "v"(i32 %v94)
+  call void asm sideeffect "; use $0", "v"(i32 %v95)
+  call void asm sideeffect "; use $0", "v"(i32 %v96)
+  call void asm sideeffect "; use $0", "v"(i32 %v97)
+  call void asm sideeffect "; use $0", "v"(i32 %v98)
+  call void asm sideeffect "; use $0", "v"(i32 %v99)
+  call void asm sideeffect "; use $0", "v"(i32 %v100)
+  call void asm sideeffect "; use $0", "v"(i32 %v101)
+  call void asm sideeffect "; use $0", "v"(i32 %v102)
+  call void asm sideeffect "; use $0", "v"(i32 %v103)
+  call void asm sideeffect "; use $0", "v"(i32 %v104)
+  call void asm sideeffect "; use $0", "v"(i32 %v105)
+  call void asm sideeffect "; use $0", "v"(i32 %v106)
+  call void asm sideeffect "; use $0", "v"(i32 %v107)
+  call void asm sideeffect "; use $0", "v"(i32 %v108)
+  call void asm sideeffect "; use $0", "v"(i32 %v109)
+  call void asm sideeffect "; use $0", "v"(i32 %v110)
+  call void asm sideeffect "; use $0", "v"(i32 %v111)
+  call void asm sideeffect "; use $0", "v"(i32 %v112)
+  call void asm sideeffect "; use $0", "v"(i32 %v113)
+  call void asm sideeffect "; use $0", "v"(i32 %v114)
+  call void asm sideeffect "; use $0", "v"(i32 %v115)
+  call void asm sideeffect "; use $0", "v"(i32 %v116)
+  call void asm sideeffect "; use $0", "v"(i32 %v117)
+  call void asm sideeffect "; use $0", "v"(i32 %v118)
+  call void asm sideeffect "; use $0", "v"(i32 %v119)
+  call void asm sideeffect "; use $0", "v"(i32 %v120)
+  call void asm sideeffect "; use $0", "v"(i32 %v121)
+  call void asm sideeffect "; use $0", "v"(i32 %v122)
+  call void asm sideeffect "; use $0", "v"(i32 %v123)
+  call void asm sideeffect "; use $0", "v"(i32 %v124)
+  call void asm sideeffect "; use $0", "v"(i32 %v125)
+  call void asm sideeffect "; use $0", "v"(i32 %v126)
+  call void asm sideeffect "; use $0", "v"(i32 %v127)
+  call void asm sideeffect "; use $0", "v"(i32 %v128)
+  call void asm sideeffect "; use $0", "v"(i32 %v129)
+  call void asm sideeffect "; use $0", "v"(i32 %v130)
+  call void asm sideeffect "; use $0", "v"(i32 %v131)
+  call void asm sideeffect "; use $0", "v"(i32 %v132)
+  call void asm sideeffect "; use $0", "v"(i32 %v133)
+  call void asm sideeffect "; use $0", "v"(i32 %v134)
+  call void asm sideeffect "; use $0", "v"(i32 %v135)
+  call void asm sideeffect "; use $0", "v"(i32 %v136)
+  call void asm sideeffect "; use $0", "v"(i32 %v137)
+  call void asm sideeffect "; use $0", "v"(i32 %v138)
+  call void asm sideeffect "; use $0", "v"(i32 %v139)
+  call void asm sideeffect "; use $0", "v"(i32 %v140)
+  call void asm sideeffect "; use $0", "v"(i32 %v141)
+  call void asm sideeffect "; use $0", "v"(i32 %v142)
+  call void asm sideeffect "; use $0", "v"(i32 %v143)
+  call void asm sideeffect "; use $0", "v"(i32 %v144)
+  call void asm sideeffect "; use $0", "v"(i32 %v145)
+  call void asm sideeffect "; use $0", "v"(i32 %v146)
+  call void asm sideeffect "; use $0", "v"(i32 %v147)
+  call void asm sideeffect "; use $0", "v"(i32 %v148)
+  call void asm sideeffect "; use $0", "v"(i32 %v149)
+  call void asm sideeffect "; use $0", "v"(i32 %v150)
+  call void asm sideeffect "; use $0", "v"(i32 %v151)
+  call void asm sideeffect "; use $0", "v"(i32 %v152)
+  call void asm sideeffect "; use $0", "v"(i32 %v153)
+  call void asm sideeffect "; use $0", "v"(i32 %v154)
+  call void asm sideeffect "; use $0", "v"(i32 %v155)
+  call void asm sideeffect "; use $0", "v"(i32 %v156)
+  call void asm sideeffect "; use $0", "v"(i32 %v157)
+  call void asm sideeffect "; use $0", "v"(i32 %v158)
+  call void asm sideeffect "; use $0", "v"(i32 %v159)
+  call void asm sideeffect "; use $0", "v"(i32 %v160)
+  call void asm sideeffect "; use $0", "v"(i32 %v161)
+  call void asm sideeffect "; use $0", "v"(i32 %v162)
+  call void asm sideeffect "; use $0", "v"(i32 %v163)
+  call void asm sideeffect "; use $0", "v"(i32 %v164)
+  call void asm sideeffect "; use $0", "v"(i32 %v165)
+  call void asm sideeffect "; use $0", "v"(i32 %v166)
+  call void asm sideeffect "; use $0", "v"(i32 %v167)
+  call void asm sideeffect "; use $0", "v"(i32 %v168)
+  call void asm sideeffect "; use $0", "v"(i32 %v169)
+  call void asm sideeffect "; use $0", "v"(i32 %v170)
+  call void asm sideeffect "; use $0", "v"(i32 %v171)
+  call void asm sideeffect "; use $0", "v"(i32 %v172)
+  call void asm sideeffect "; use $0", "v"(i32 %v173)
+  call void asm sideeffect "; use $0", "v"(i32 %v174)
+  call void asm sideeffect "; use $0", "v"(i32 %v175)
+  call void asm sideeffect "; use $0", "v"(i32 %v176)
+  call void asm sideeffect "; use $0", "v"(i32 %v177)
+  call void asm sideeffect "; use $0", "v"(i32 %v178)
+  call void asm sideeffect "; use $0", "v"(i32 %v179)
+  call void asm sideeffect "; use $0", "v"(i32 %v180)
+  call void asm sideeffect "; use $0", "v"(i32 %v181)
+  call void asm sideeffect "; use $0", "v"(i32 %v182)
+  call void asm sideeffect "; use $0", "v"(i32 %v183)
+  call void asm sideeffect "; use $0", "v"(i32 %v184)
+  call void asm sideeffect "; use $0", "v"(i32 %v185)
+  call void asm sideeffect "; use $0", "v"(i32 %v186)
+  call void asm sideeffect "; use $0", "v"(i32 %v187)
+  call void asm sideeffect "; use $0", "v"(i32 %v188)
+  call void asm sideeffect "; use $0", "v"(i32 %v189)
+  call void asm sideeffect "; use $0", "v"(i32 %v190)
+  call void asm sideeffect "; use $0", "v"(i32 %v191)
+  call void asm sideeffect "; use $0", "v"(i32 %v192)
+  call void asm sideeffect "; use $0", "v"(i32 %v193)
+  call void asm sideeffect "; use $0", "v"(i32 %v194)
+  call void asm sideeffect "; use $0", "v"(i32 %v195)
+  call void asm sideeffect "; use $0", "v"(i32 %v196)
+  call void asm sideeffect "; use $0", "v"(i32 %v197)
+  call void asm sideeffect "; use $0", "v"(i32 %v198)
+  call void asm sideeffect "; use $0", "v"(i32 %v199)
+  call void asm sideeffect "; use $0", "v"(i32 %v200)
+  call void asm sideeffect "; use $0", "v"(i32 %v201)
+  call void asm sideeffect "; use $0", "v"(i32 %v202)
+  call void asm sideeffect "; use $0", "v"(i32 %v203)
+  call void asm sideeffect "; use $0", "v"(i32 %v204)
+  call void asm sideeffect "; use $0", "v"(i32 %v205)
+  call void asm sideeffect "; use $0", "v"(i32 %v206)
+  call void asm sideeffect "; use $0", "v"(i32 %v207)
+  call void asm sideeffect "; use $0", "v"(i32 %v208)
+  call void asm sideeffect "; use $0", "v"(i32 %v209)
+  call void asm sideeffect "; use $0", "v"(i32 %v210)
+  call void asm sideeffect "; use $0", "v"(i32 %v211)
+  call void asm sideeffect "; use $0", "v"(i32 %v212)
+  call void asm sideeffect "; use $0", "v"(i32 %v213)
+  call void asm sideeffect "; use $0", "v"(i32 %v214)
+  call void asm sideeffect "; use $0", "v"(i32 %v215)
+  call void asm sideeffect "; use $0", "v"(i32 %v216)
+  call void asm sideeffect "; use $0", "v"(i32 %v217)
+  call void asm sideeffect "; use $0", "v"(i32 %v218)
+  call void asm sideeffect "; use $0", "v"(i32 %v219)
+  call void asm sideeffect "; use $0", "v"(i32 %v220)
+  call void asm sideeffect "; use $0", "v"(i32 %v221)
+  call void asm sideeffect "; use $0", "v"(i32 %v222)
+  call void asm sideeffect "; use $0", "v"(i32 %v223)
+  call void asm sideeffect "; use $0", "v"(i32 %v224)
+  call void asm sideeffect "; use $0", "v"(i32 %v225)
+  call void asm sideeffect "; use $0", "v"(i32 %v226)
+  call void asm sideeffect "; use $0", "v"(i32 %v227)
+  call void asm sideeffect "; use $0", "v"(i32 %v228)
+  call void asm sideeffect "; use $0", "v"(i32 %v229)
+  call void asm sideeffect "; use $0", "v"(i32 %v230)
+  call void asm sideeffect "; use $0", "v"(i32 %v231)
+  call void asm sideeffect "; use $0", "v"(i32 %v232)
+  call void asm sideeffect "; use $0", "v"(i32 %v233)
+  call void asm sideeffect "; use $0", "v"(i32 %v234)
+  call void asm sideeffect "; use $0", "v"(i32 %v235)
+  call void asm sideeffect "; use $0", "v"(i32 %v236)
+  call void asm sideeffect "; use $0", "v"(i32 %v237)
+  call void asm sideeffect "; use $0", "v"(i32 %v238)
+  call void asm sideeffect "; use $0", "v"(i32 %v239)
+  call void asm sideeffect "; use $0", "v"(i32 %v240)
+  call void asm sideeffect "; use $0", "v"(i32 %v241)
+  call void asm sideeffect "; use $0", "v"(i32 %v242)
+  call void asm sideeffect "; use $0", "v"(i32 %v243)
+  call void asm sideeffect "; use $0", "v"(i32 %v244)
+  call void asm sideeffect "; use $0", "v"(i32 %v245)
+  call void asm sideeffect "; use $0", "v"(i32 %v246)
+  call void asm sideeffect "; use $0", "v"(i32 %v247)
+  call void asm sideeffect "; use $0", "v"(i32 %v248)
+  call void asm sideeffect "; use $0", "v"(i32 %v249)
+  call void asm sideeffect "; use $0", "v"(i32 %v250)
+  call void asm sideeffect "; use $0", "v"(i32 %v251)
+  call void asm sideeffect "; use $0", "v"(i32 %v252)
+  call void asm sideeffect "; use $0", "v"(i32 %v253)
+  call void asm sideeffect "; use $0", "v"(i32 %v254)
+  call void asm sideeffect "; use $0", "v"(i32 %v255)
+  ret void
+}
+
 define internal void @use512vgprs() {
   %v0 = call <32 x i32> asm sideeffect "; def $0", "=v"()
   %v1 = call <32 x i32> asm sideeffect "; def $0", "=v"()
@@ -560,12 +1076,13 @@ define amdgpu_kernel void @k256_w8() #2568 {
 ; GFX90A: NumVgprs: 64
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 64
-define amdgpu_kernel void @k256_w8_no_agprs() #2568 {
+define amdgpu_kernel void @k256_w8_no_agprs() #2569 {
   call void @use256vgprs()
   ret void
 }
 
 attributes #2568 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" }
+attributes #2569 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" "amdgpu-no-agpr" }
 
 ; GCN-LABEL: {{^}}k256_w4:
 ; GFX90A: NumVgprs: 64
@@ -581,12 +1098,13 @@ define amdgpu_kernel void @k256_w4() #2564 {
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k256_w4_no_agprs() #2564 {
+define amdgpu_kernel void @k256_w4_no_agprs() #2565 {
   call void @use256vgprs()
   ret void
 }
 
 attributes #2564 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" }
+attributes #2565 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" "amdgpu-no-agpr" }
 
 ; GCN-LABEL: {{^}}k256_w2:
 ; GFX90A: NumVgprs: 128
@@ -602,12 +1120,13 @@ define amdgpu_kernel void @k256_w2() #2562 {
 ; GFX90A: NumVgprs: 256
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k256_w2_no_agprs() #2562 {
+define amdgpu_kernel void @k256_w2_no_agprs() #2563 {
   call void @use256vgprs()
   ret void
 }
 
 attributes #2562 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" }
+attributes #2563 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
 
 ; GCN-LABEL: {{^}}k256_w1:
 ; GFX90A: NumVgprs: 256
@@ -634,7 +1153,7 @@ attributes #2561 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-wa
 ; GFX90A: NumVgprs: 256
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k512_no_agprs() #512 {
+define amdgpu_kernel void @k512_no_agprs() #513 {
   call void @use256vgprs()
   ret void
 }
@@ -677,13 +1196,39 @@ define void @f512() #512 {
   ret void
 }
 
+; GCN-LABEL: {{^}}f512_no_agpr:
+; GFX90A: NumVgprs: 256
+; GFX90A: NumAgprs: 0
+define void @f512_no_agpr() #513 {
+  call void @use256vgprs_no_agpr()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f512_no_agpr_ub:
+; GFX90A: NumVgprs: 256
+; GFX90A: NumAgprs: 0
+define void @f512_no_agpr_ub() #513 {
+  call void @use256vgprs()
+  ret void
+}
+
 attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
+attributes #513 = { nounwind "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" }
 
 ; GCN-LABEL: {{^}}k1024:
+; GFX90A: NumVgprs: 64
+; GFX90A: NumAgprs: 64
+; GFX90A: TotalNumVgprs: 128
+define amdgpu_kernel void @k1024() #1024 {
+  call void @use256vgprs()
+  ret void
+}
+
+; GCN-LABEL: {{^}}k1024_no_agprs:
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k1024() #1024 {
+define amdgpu_kernel void @k1024_no_agprs() #1025 {
   call void @use256vgprs()
   ret void
 }
@@ -698,4 +1243,61 @@ define amdgpu_kernel void @k1024_call() #1024 {
   ret void
 }
 
+; GCN-LABEL: {{^}}k1024_call_no_agprs:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define amdgpu_kernel void @k1024_call_no_agprs() #1025 {
+  call void @use256vgprs()
+  ret void
+}
+
+; @foo uses an AGPRs so amdgpu-no-agpr is undefined
+; GCN-LABEL: {{^}}k1024_call_no_agprs_ub_callee:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 1
+; GFX90A: TotalNumVgprs: 129
+define amdgpu_kernel void @k1024_call_no_agprs_ub_callee() #1025 {
+  call void @foo()
+  call void @use256vgprs()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f1024_0:
+; GFX90A: NumVgprs: 32
+; GFX90A: NumAgprs: 1
+; GFX90A: TotalNumVgprs: 33
+define void @f1024_0() #1024 {
+  call void @foo()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f1024_1:
+; GFX90A: NumVgprs: 64
+; GFX90A: NumAgprs: 32
+; GFX90A: TotalNumVgprs: 96
+define void @f1024_1() #1024 {
+  call void @use256vgprs()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f1024_call_no_agprs:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define void @f1024_call_no_agprs() #1025 {
+  call void @use256vgprs_no_agpr()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f1024_call_no_agprs_ub:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define void @f1024_call_no_agprs_ub() #1025 {
+  call void @use256vgprs()
+  ret void
+}
+
 attributes #1024 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" }
+attributes #1025 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-no-agpr" }


        


More information about the llvm-commits mailing list