[llvm] AMDGPU: Respect amdgpu-no-agpr in functions and with calls (PR #128147)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 21 00:01:26 PST 2025


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/128147

Remove the MIR scan to detect whether AGPRs are used or not,
and the special case for callable functions. This behavior was
confusing, and not overridable. The amdgpu-no-agpr attribute was
intended to avoid this imprecise heuristic for how many AGPRs to
allocate. It was also too confusing to make this interact with
the pending amdgpu-num-agpr replacement for amdgpu-no-agpr.

Also adds an xfail-ish test where the register allocator asserts
after allocation fails which I ran into.

Future work should reintroduce a more refined MIR scan to estimate
AGPR pressure for how to split AGPRs and VGPRs.

>From 167fd5a33f8ea0a162e31d33add76b202abcb43a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 21 Feb 2025 13:34:22 +0700
Subject: [PATCH] AMDGPU: Respect amdgpu-no-agpr in functions and with calls

Remove the MIR scan to detect whether AGPRs are used or not,
and the special case for callable functions. This behavior was
confusing, and not overridable. The amdgpu-no-agpr attribute was
intended to avoid this imprecise heuristic for how many AGPRs to
allocate. It was also too confusing to make this interact with
the pending amdgpu-num-agpr replacement for amdgpu-no-agpr.

Also adds an xfail-ish test where the register allocator asserts
after allocation fails which I ran into.

Future work should reintroduce a more refined MIR scan to estimate
AGPR pressure for how to split AGPRs and VGPRs.
---
 .../Target/AMDGPU/SIMachineFunctionInfo.cpp   |  54 +-
 .../lib/Target/AMDGPU/SIMachineFunctionInfo.h |   5 -
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |   2 +-
 ...isters-assertion-after-ra-failure.xfail.ll |  23 +
 .../AMDGPU/agpr-copy-no-free-registers.ll     |   5 +-
 llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll    |  16 +-
 .../CodeGen/AMDGPU/spill-regpressure-less.mir |   2 +-
 .../CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll  | 612 +++++++++++++++++-
 8 files changed, 655 insertions(+), 64 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll

diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c5efb89d8b2db..a83fc2d188de2 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -64,6 +64,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
   }
 
   MayNeedAGPRs = ST.hasMAIInsts();
+  if (ST.hasGFX90AInsts() &&
+      ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
+      !mayUseAGPRs(F))
+    MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
 
   if (AMDGPU::isChainCC(CC)) {
     // Chain functions don't receive an SP from their caller, but are free to
@@ -98,13 +102,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
       ImplicitArgPtr = true;
   } else {
     ImplicitArgPtr = false;
-    MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
-                               MaxKernArgAlign);
-
-    if (ST.hasGFX90AInsts() &&
-        ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
-        !mayUseAGPRs(F))
-      MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
+    MaxKernArgAlign =
+        std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);
   }
 
   if (!AMDGPU::isGraphics(CC) ||
@@ -783,44 +782,3 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
 bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
   return !F.hasFnAttribute("amdgpu-no-agpr");
 }
-
-bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
-  if (UsesAGPRs)
-    return *UsesAGPRs;
-
-  if (!mayNeedAGPRs()) {
-    UsesAGPRs = false;
-    return false;
-  }
-
-  if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
-      MF.getFrameInfo().hasCalls()) {
-    UsesAGPRs = true;
-    return true;
-  }
-
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
-    const Register Reg = Register::index2VirtReg(I);
-    const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
-    if (RC && SIRegisterInfo::isAGPRClass(RC)) {
-      UsesAGPRs = true;
-      return true;
-    }
-    if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
-      // Defer caching UsesAGPRs, function might not yet been regbank selected.
-      return true;
-    }
-  }
-
-  for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
-    if (MRI.isPhysRegUsed(Reg)) {
-      UsesAGPRs = true;
-      return true;
-    }
-  }
-
-  UsesAGPRs = false;
-  return false;
-}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 2e2716f1ce888..740f752bc93b7 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -494,8 +494,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   // scheduler stage.
   unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
 
-  mutable std::optional<bool> UsesAGPRs;
-
   MCPhysReg getNextUserSGPR() const;
 
   MCPhysReg getNextSystemSGPR() const;
@@ -1126,9 +1124,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   // has a call which may use it.
   bool mayUseAGPRs(const Function &F) const;
 
-  // \returns true if a function needs or may need AGPRs.
-  bool usesAGPRs(const MachineFunction &MF) const;
-
   /// \returns Default/requested number of work groups for this function.
   SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
 
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 71c720ed09b5f..924aa45559366 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -585,7 +585,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
   // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
   //       register file accordingly.
   if (ST.hasGFX90AInsts()) {
-    if (MFI->usesAGPRs(MF)) {
+    if (MFI->mayNeedAGPRs()) {
       MaxNumVGPRs /= 2;
       MaxNumAGPRs = MaxNumVGPRs;
     } else {
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
new file mode 100644
index 0000000000000..feae79b377174
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CRASH %s
+
+; CRASH: error: <unknown>:0:0: no registers from class available to allocate in function 'no_free_vgprs_at_agpr_to_agpr_copy'
+; CRASH: Assertion failed: (valid() && "Cannot access invalid iterator")
+
+define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
+  %asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1", "=${v[0:31]},=${a[0:15]}"()
+  %vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0
+  %agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1
+  %mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0)
+  %agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0)
+  %agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma)
+  call void asm sideeffect "; use $0 $1", "{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0)
+  ret void
+}
+
+declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
+declare noundef i32 @llvm.amdgcn.workitem.id.x() #2
+
+attributes #0 = { "amdgpu-no-agpr" "amdgpu-waves-per-eu"="6,6" }
+attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 4ce46bbaf45ac..d1b01eeee11a4 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -240,7 +240,7 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
 }
 
 ; Check that we do make use of v32 if there are no AGPRs present in the function
-define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #5 {
 ; GFX908-LABEL: no_agpr_no_reserve:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
@@ -1144,5 +1144,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
 attributes #0 = { "amdgpu-waves-per-eu"="6,6" }
 attributes #1 = { convergent nounwind readnone willreturn }
 attributes #2 = { nounwind readnone willreturn }
-attributes #3 = { "amdgpu-waves-per-eu"="7,7" }
+attributes #3 = { "amdgpu-waves-per-eu"="7,7" "amdgpu-no-agpr" }
 attributes #4 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-flat-work-group-size"="1024,1024" }
+attributes #5 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-no-agpr" }
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 322686b0144a0..f6f78f134fc1f 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -94,9 +94,20 @@ bb3:
   ret void
 }
 
-; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry:
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_noagpr:
+; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
+; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
+define void @test_mfma_f32_32x32x1f32_nonentry_noagpr(ptr addrspace(1) %arg) #0 {
+bb:
+  %in.1 = load <32 x float>, ptr addrspace(1) %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
+  store <32 x float> %mai.1, ptr addrspace(1) %arg
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_with_agpr:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) #0 {
+define void @test_mfma_f32_32x32x1f32_nonentry_with_agpr(ptr addrspace(1) %arg) #3 {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -109,3 +120,4 @@ declare void @foo()
 attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
 attributes #1 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
 attributes #2 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" }
+attributes #3 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
index ed57caadea5c5..583d1f9e4a9fd 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
@@ -6,7 +6,7 @@
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
+  attributes #0 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-no-agpr" }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
index 040799435db4a..03978e68e81b4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
@@ -518,6 +518,522 @@ define internal void @use256vgprs() {
   ret void
 }
 
+define internal void @use256vgprs_no_agpr() "amdgpu-no-agpr" {
+  %v0 = call i32 asm sideeffect "; def $0", "=v"()
+  %v1 = call i32 asm sideeffect "; def $0", "=v"()
+  %v2 = call i32 asm sideeffect "; def $0", "=v"()
+  %v3 = call i32 asm sideeffect "; def $0", "=v"()
+  %v4 = call i32 asm sideeffect "; def $0", "=v"()
+  %v5 = call i32 asm sideeffect "; def $0", "=v"()
+  %v6 = call i32 asm sideeffect "; def $0", "=v"()
+  %v7 = call i32 asm sideeffect "; def $0", "=v"()
+  %v8 = call i32 asm sideeffect "; def $0", "=v"()
+  %v9 = call i32 asm sideeffect "; def $0", "=v"()
+  %v10 = call i32 asm sideeffect "; def $0", "=v"()
+  %v11 = call i32 asm sideeffect "; def $0", "=v"()
+  %v12 = call i32 asm sideeffect "; def $0", "=v"()
+  %v13 = call i32 asm sideeffect "; def $0", "=v"()
+  %v14 = call i32 asm sideeffect "; def $0", "=v"()
+  %v15 = call i32 asm sideeffect "; def $0", "=v"()
+  %v16 = call i32 asm sideeffect "; def $0", "=v"()
+  %v17 = call i32 asm sideeffect "; def $0", "=v"()
+  %v18 = call i32 asm sideeffect "; def $0", "=v"()
+  %v19 = call i32 asm sideeffect "; def $0", "=v"()
+  %v20 = call i32 asm sideeffect "; def $0", "=v"()
+  %v21 = call i32 asm sideeffect "; def $0", "=v"()
+  %v22 = call i32 asm sideeffect "; def $0", "=v"()
+  %v23 = call i32 asm sideeffect "; def $0", "=v"()
+  %v24 = call i32 asm sideeffect "; def $0", "=v"()
+  %v25 = call i32 asm sideeffect "; def $0", "=v"()
+  %v26 = call i32 asm sideeffect "; def $0", "=v"()
+  %v27 = call i32 asm sideeffect "; def $0", "=v"()
+  %v28 = call i32 asm sideeffect "; def $0", "=v"()
+  %v29 = call i32 asm sideeffect "; def $0", "=v"()
+  %v30 = call i32 asm sideeffect "; def $0", "=v"()
+  %v31 = call i32 asm sideeffect "; def $0", "=v"()
+  %v32 = call i32 asm sideeffect "; def $0", "=v"()
+  %v33 = call i32 asm sideeffect "; def $0", "=v"()
+  %v34 = call i32 asm sideeffect "; def $0", "=v"()
+  %v35 = call i32 asm sideeffect "; def $0", "=v"()
+  %v36 = call i32 asm sideeffect "; def $0", "=v"()
+  %v37 = call i32 asm sideeffect "; def $0", "=v"()
+  %v38 = call i32 asm sideeffect "; def $0", "=v"()
+  %v39 = call i32 asm sideeffect "; def $0", "=v"()
+  %v40 = call i32 asm sideeffect "; def $0", "=v"()
+  %v41 = call i32 asm sideeffect "; def $0", "=v"()
+  %v42 = call i32 asm sideeffect "; def $0", "=v"()
+  %v43 = call i32 asm sideeffect "; def $0", "=v"()
+  %v44 = call i32 asm sideeffect "; def $0", "=v"()
+  %v45 = call i32 asm sideeffect "; def $0", "=v"()
+  %v46 = call i32 asm sideeffect "; def $0", "=v"()
+  %v47 = call i32 asm sideeffect "; def $0", "=v"()
+  %v48 = call i32 asm sideeffect "; def $0", "=v"()
+  %v49 = call i32 asm sideeffect "; def $0", "=v"()
+  %v50 = call i32 asm sideeffect "; def $0", "=v"()
+  %v51 = call i32 asm sideeffect "; def $0", "=v"()
+  %v52 = call i32 asm sideeffect "; def $0", "=v"()
+  %v53 = call i32 asm sideeffect "; def $0", "=v"()
+  %v54 = call i32 asm sideeffect "; def $0", "=v"()
+  %v55 = call i32 asm sideeffect "; def $0", "=v"()
+  %v56 = call i32 asm sideeffect "; def $0", "=v"()
+  %v57 = call i32 asm sideeffect "; def $0", "=v"()
+  %v58 = call i32 asm sideeffect "; def $0", "=v"()
+  %v59 = call i32 asm sideeffect "; def $0", "=v"()
+  %v60 = call i32 asm sideeffect "; def $0", "=v"()
+  %v61 = call i32 asm sideeffect "; def $0", "=v"()
+  %v62 = call i32 asm sideeffect "; def $0", "=v"()
+  %v63 = call i32 asm sideeffect "; def $0", "=v"()
+  %v64 = call i32 asm sideeffect "; def $0", "=v"()
+  %v65 = call i32 asm sideeffect "; def $0", "=v"()
+  %v66 = call i32 asm sideeffect "; def $0", "=v"()
+  %v67 = call i32 asm sideeffect "; def $0", "=v"()
+  %v68 = call i32 asm sideeffect "; def $0", "=v"()
+  %v69 = call i32 asm sideeffect "; def $0", "=v"()
+  %v70 = call i32 asm sideeffect "; def $0", "=v"()
+  %v71 = call i32 asm sideeffect "; def $0", "=v"()
+  %v72 = call i32 asm sideeffect "; def $0", "=v"()
+  %v73 = call i32 asm sideeffect "; def $0", "=v"()
+  %v74 = call i32 asm sideeffect "; def $0", "=v"()
+  %v75 = call i32 asm sideeffect "; def $0", "=v"()
+  %v76 = call i32 asm sideeffect "; def $0", "=v"()
+  %v77 = call i32 asm sideeffect "; def $0", "=v"()
+  %v78 = call i32 asm sideeffect "; def $0", "=v"()
+  %v79 = call i32 asm sideeffect "; def $0", "=v"()
+  %v80 = call i32 asm sideeffect "; def $0", "=v"()
+  %v81 = call i32 asm sideeffect "; def $0", "=v"()
+  %v82 = call i32 asm sideeffect "; def $0", "=v"()
+  %v83 = call i32 asm sideeffect "; def $0", "=v"()
+  %v84 = call i32 asm sideeffect "; def $0", "=v"()
+  %v85 = call i32 asm sideeffect "; def $0", "=v"()
+  %v86 = call i32 asm sideeffect "; def $0", "=v"()
+  %v87 = call i32 asm sideeffect "; def $0", "=v"()
+  %v88 = call i32 asm sideeffect "; def $0", "=v"()
+  %v89 = call i32 asm sideeffect "; def $0", "=v"()
+  %v90 = call i32 asm sideeffect "; def $0", "=v"()
+  %v91 = call i32 asm sideeffect "; def $0", "=v"()
+  %v92 = call i32 asm sideeffect "; def $0", "=v"()
+  %v93 = call i32 asm sideeffect "; def $0", "=v"()
+  %v94 = call i32 asm sideeffect "; def $0", "=v"()
+  %v95 = call i32 asm sideeffect "; def $0", "=v"()
+  %v96 = call i32 asm sideeffect "; def $0", "=v"()
+  %v97 = call i32 asm sideeffect "; def $0", "=v"()
+  %v98 = call i32 asm sideeffect "; def $0", "=v"()
+  %v99 = call i32 asm sideeffect "; def $0", "=v"()
+  %v100 = call i32 asm sideeffect "; def $0", "=v"()
+  %v101 = call i32 asm sideeffect "; def $0", "=v"()
+  %v102 = call i32 asm sideeffect "; def $0", "=v"()
+  %v103 = call i32 asm sideeffect "; def $0", "=v"()
+  %v104 = call i32 asm sideeffect "; def $0", "=v"()
+  %v105 = call i32 asm sideeffect "; def $0", "=v"()
+  %v106 = call i32 asm sideeffect "; def $0", "=v"()
+  %v107 = call i32 asm sideeffect "; def $0", "=v"()
+  %v108 = call i32 asm sideeffect "; def $0", "=v"()
+  %v109 = call i32 asm sideeffect "; def $0", "=v"()
+  %v110 = call i32 asm sideeffect "; def $0", "=v"()
+  %v111 = call i32 asm sideeffect "; def $0", "=v"()
+  %v112 = call i32 asm sideeffect "; def $0", "=v"()
+  %v113 = call i32 asm sideeffect "; def $0", "=v"()
+  %v114 = call i32 asm sideeffect "; def $0", "=v"()
+  %v115 = call i32 asm sideeffect "; def $0", "=v"()
+  %v116 = call i32 asm sideeffect "; def $0", "=v"()
+  %v117 = call i32 asm sideeffect "; def $0", "=v"()
+  %v118 = call i32 asm sideeffect "; def $0", "=v"()
+  %v119 = call i32 asm sideeffect "; def $0", "=v"()
+  %v120 = call i32 asm sideeffect "; def $0", "=v"()
+  %v121 = call i32 asm sideeffect "; def $0", "=v"()
+  %v122 = call i32 asm sideeffect "; def $0", "=v"()
+  %v123 = call i32 asm sideeffect "; def $0", "=v"()
+  %v124 = call i32 asm sideeffect "; def $0", "=v"()
+  %v125 = call i32 asm sideeffect "; def $0", "=v"()
+  %v126 = call i32 asm sideeffect "; def $0", "=v"()
+  %v127 = call i32 asm sideeffect "; def $0", "=v"()
+  %v128 = call i32 asm sideeffect "; def $0", "=v"()
+  %v129 = call i32 asm sideeffect "; def $0", "=v"()
+  %v130 = call i32 asm sideeffect "; def $0", "=v"()
+  %v131 = call i32 asm sideeffect "; def $0", "=v"()
+  %v132 = call i32 asm sideeffect "; def $0", "=v"()
+  %v133 = call i32 asm sideeffect "; def $0", "=v"()
+  %v134 = call i32 asm sideeffect "; def $0", "=v"()
+  %v135 = call i32 asm sideeffect "; def $0", "=v"()
+  %v136 = call i32 asm sideeffect "; def $0", "=v"()
+  %v137 = call i32 asm sideeffect "; def $0", "=v"()
+  %v138 = call i32 asm sideeffect "; def $0", "=v"()
+  %v139 = call i32 asm sideeffect "; def $0", "=v"()
+  %v140 = call i32 asm sideeffect "; def $0", "=v"()
+  %v141 = call i32 asm sideeffect "; def $0", "=v"()
+  %v142 = call i32 asm sideeffect "; def $0", "=v"()
+  %v143 = call i32 asm sideeffect "; def $0", "=v"()
+  %v144 = call i32 asm sideeffect "; def $0", "=v"()
+  %v145 = call i32 asm sideeffect "; def $0", "=v"()
+  %v146 = call i32 asm sideeffect "; def $0", "=v"()
+  %v147 = call i32 asm sideeffect "; def $0", "=v"()
+  %v148 = call i32 asm sideeffect "; def $0", "=v"()
+  %v149 = call i32 asm sideeffect "; def $0", "=v"()
+  %v150 = call i32 asm sideeffect "; def $0", "=v"()
+  %v151 = call i32 asm sideeffect "; def $0", "=v"()
+  %v152 = call i32 asm sideeffect "; def $0", "=v"()
+  %v153 = call i32 asm sideeffect "; def $0", "=v"()
+  %v154 = call i32 asm sideeffect "; def $0", "=v"()
+  %v155 = call i32 asm sideeffect "; def $0", "=v"()
+  %v156 = call i32 asm sideeffect "; def $0", "=v"()
+  %v157 = call i32 asm sideeffect "; def $0", "=v"()
+  %v158 = call i32 asm sideeffect "; def $0", "=v"()
+  %v159 = call i32 asm sideeffect "; def $0", "=v"()
+  %v160 = call i32 asm sideeffect "; def $0", "=v"()
+  %v161 = call i32 asm sideeffect "; def $0", "=v"()
+  %v162 = call i32 asm sideeffect "; def $0", "=v"()
+  %v163 = call i32 asm sideeffect "; def $0", "=v"()
+  %v164 = call i32 asm sideeffect "; def $0", "=v"()
+  %v165 = call i32 asm sideeffect "; def $0", "=v"()
+  %v166 = call i32 asm sideeffect "; def $0", "=v"()
+  %v167 = call i32 asm sideeffect "; def $0", "=v"()
+  %v168 = call i32 asm sideeffect "; def $0", "=v"()
+  %v169 = call i32 asm sideeffect "; def $0", "=v"()
+  %v170 = call i32 asm sideeffect "; def $0", "=v"()
+  %v171 = call i32 asm sideeffect "; def $0", "=v"()
+  %v172 = call i32 asm sideeffect "; def $0", "=v"()
+  %v173 = call i32 asm sideeffect "; def $0", "=v"()
+  %v174 = call i32 asm sideeffect "; def $0", "=v"()
+  %v175 = call i32 asm sideeffect "; def $0", "=v"()
+  %v176 = call i32 asm sideeffect "; def $0", "=v"()
+  %v177 = call i32 asm sideeffect "; def $0", "=v"()
+  %v178 = call i32 asm sideeffect "; def $0", "=v"()
+  %v179 = call i32 asm sideeffect "; def $0", "=v"()
+  %v180 = call i32 asm sideeffect "; def $0", "=v"()
+  %v181 = call i32 asm sideeffect "; def $0", "=v"()
+  %v182 = call i32 asm sideeffect "; def $0", "=v"()
+  %v183 = call i32 asm sideeffect "; def $0", "=v"()
+  %v184 = call i32 asm sideeffect "; def $0", "=v"()
+  %v185 = call i32 asm sideeffect "; def $0", "=v"()
+  %v186 = call i32 asm sideeffect "; def $0", "=v"()
+  %v187 = call i32 asm sideeffect "; def $0", "=v"()
+  %v188 = call i32 asm sideeffect "; def $0", "=v"()
+  %v189 = call i32 asm sideeffect "; def $0", "=v"()
+  %v190 = call i32 asm sideeffect "; def $0", "=v"()
+  %v191 = call i32 asm sideeffect "; def $0", "=v"()
+  %v192 = call i32 asm sideeffect "; def $0", "=v"()
+  %v193 = call i32 asm sideeffect "; def $0", "=v"()
+  %v194 = call i32 asm sideeffect "; def $0", "=v"()
+  %v195 = call i32 asm sideeffect "; def $0", "=v"()
+  %v196 = call i32 asm sideeffect "; def $0", "=v"()
+  %v197 = call i32 asm sideeffect "; def $0", "=v"()
+  %v198 = call i32 asm sideeffect "; def $0", "=v"()
+  %v199 = call i32 asm sideeffect "; def $0", "=v"()
+  %v200 = call i32 asm sideeffect "; def $0", "=v"()
+  %v201 = call i32 asm sideeffect "; def $0", "=v"()
+  %v202 = call i32 asm sideeffect "; def $0", "=v"()
+  %v203 = call i32 asm sideeffect "; def $0", "=v"()
+  %v204 = call i32 asm sideeffect "; def $0", "=v"()
+  %v205 = call i32 asm sideeffect "; def $0", "=v"()
+  %v206 = call i32 asm sideeffect "; def $0", "=v"()
+  %v207 = call i32 asm sideeffect "; def $0", "=v"()
+  %v208 = call i32 asm sideeffect "; def $0", "=v"()
+  %v209 = call i32 asm sideeffect "; def $0", "=v"()
+  %v210 = call i32 asm sideeffect "; def $0", "=v"()
+  %v211 = call i32 asm sideeffect "; def $0", "=v"()
+  %v212 = call i32 asm sideeffect "; def $0", "=v"()
+  %v213 = call i32 asm sideeffect "; def $0", "=v"()
+  %v214 = call i32 asm sideeffect "; def $0", "=v"()
+  %v215 = call i32 asm sideeffect "; def $0", "=v"()
+  %v216 = call i32 asm sideeffect "; def $0", "=v"()
+  %v217 = call i32 asm sideeffect "; def $0", "=v"()
+  %v218 = call i32 asm sideeffect "; def $0", "=v"()
+  %v219 = call i32 asm sideeffect "; def $0", "=v"()
+  %v220 = call i32 asm sideeffect "; def $0", "=v"()
+  %v221 = call i32 asm sideeffect "; def $0", "=v"()
+  %v222 = call i32 asm sideeffect "; def $0", "=v"()
+  %v223 = call i32 asm sideeffect "; def $0", "=v"()
+  %v224 = call i32 asm sideeffect "; def $0", "=v"()
+  %v225 = call i32 asm sideeffect "; def $0", "=v"()
+  %v226 = call i32 asm sideeffect "; def $0", "=v"()
+  %v227 = call i32 asm sideeffect "; def $0", "=v"()
+  %v228 = call i32 asm sideeffect "; def $0", "=v"()
+  %v229 = call i32 asm sideeffect "; def $0", "=v"()
+  %v230 = call i32 asm sideeffect "; def $0", "=v"()
+  %v231 = call i32 asm sideeffect "; def $0", "=v"()
+  %v232 = call i32 asm sideeffect "; def $0", "=v"()
+  %v233 = call i32 asm sideeffect "; def $0", "=v"()
+  %v234 = call i32 asm sideeffect "; def $0", "=v"()
+  %v235 = call i32 asm sideeffect "; def $0", "=v"()
+  %v236 = call i32 asm sideeffect "; def $0", "=v"()
+  %v237 = call i32 asm sideeffect "; def $0", "=v"()
+  %v238 = call i32 asm sideeffect "; def $0", "=v"()
+  %v239 = call i32 asm sideeffect "; def $0", "=v"()
+  %v240 = call i32 asm sideeffect "; def $0", "=v"()
+  %v241 = call i32 asm sideeffect "; def $0", "=v"()
+  %v242 = call i32 asm sideeffect "; def $0", "=v"()
+  %v243 = call i32 asm sideeffect "; def $0", "=v"()
+  %v244 = call i32 asm sideeffect "; def $0", "=v"()
+  %v245 = call i32 asm sideeffect "; def $0", "=v"()
+  %v246 = call i32 asm sideeffect "; def $0", "=v"()
+  %v247 = call i32 asm sideeffect "; def $0", "=v"()
+  %v248 = call i32 asm sideeffect "; def $0", "=v"()
+  %v249 = call i32 asm sideeffect "; def $0", "=v"()
+  %v250 = call i32 asm sideeffect "; def $0", "=v"()
+  %v251 = call i32 asm sideeffect "; def $0", "=v"()
+  %v252 = call i32 asm sideeffect "; def $0", "=v"()
+  %v253 = call i32 asm sideeffect "; def $0", "=v"()
+  %v254 = call i32 asm sideeffect "; def $0", "=v"()
+  %v255 = call i32 asm sideeffect "; def $0", "=v"()
+  call void asm sideeffect "; use $0", "v"(i32 %v0)
+  call void asm sideeffect "; use $0", "v"(i32 %v1)
+  call void asm sideeffect "; use $0", "v"(i32 %v2)
+  call void asm sideeffect "; use $0", "v"(i32 %v3)
+  call void asm sideeffect "; use $0", "v"(i32 %v4)
+  call void asm sideeffect "; use $0", "v"(i32 %v5)
+  call void asm sideeffect "; use $0", "v"(i32 %v6)
+  call void asm sideeffect "; use $0", "v"(i32 %v7)
+  call void asm sideeffect "; use $0", "v"(i32 %v8)
+  call void asm sideeffect "; use $0", "v"(i32 %v9)
+  call void asm sideeffect "; use $0", "v"(i32 %v10)
+  call void asm sideeffect "; use $0", "v"(i32 %v11)
+  call void asm sideeffect "; use $0", "v"(i32 %v12)
+  call void asm sideeffect "; use $0", "v"(i32 %v13)
+  call void asm sideeffect "; use $0", "v"(i32 %v14)
+  call void asm sideeffect "; use $0", "v"(i32 %v15)
+  call void asm sideeffect "; use $0", "v"(i32 %v16)
+  call void asm sideeffect "; use $0", "v"(i32 %v17)
+  call void asm sideeffect "; use $0", "v"(i32 %v18)
+  call void asm sideeffect "; use $0", "v"(i32 %v19)
+  call void asm sideeffect "; use $0", "v"(i32 %v20)
+  call void asm sideeffect "; use $0", "v"(i32 %v21)
+  call void asm sideeffect "; use $0", "v"(i32 %v22)
+  call void asm sideeffect "; use $0", "v"(i32 %v23)
+  call void asm sideeffect "; use $0", "v"(i32 %v24)
+  call void asm sideeffect "; use $0", "v"(i32 %v25)
+  call void asm sideeffect "; use $0", "v"(i32 %v26)
+  call void asm sideeffect "; use $0", "v"(i32 %v27)
+  call void asm sideeffect "; use $0", "v"(i32 %v28)
+  call void asm sideeffect "; use $0", "v"(i32 %v29)
+  call void asm sideeffect "; use $0", "v"(i32 %v30)
+  call void asm sideeffect "; use $0", "v"(i32 %v31)
+  call void asm sideeffect "; use $0", "v"(i32 %v32)
+  call void asm sideeffect "; use $0", "v"(i32 %v33)
+  call void asm sideeffect "; use $0", "v"(i32 %v34)
+  call void asm sideeffect "; use $0", "v"(i32 %v35)
+  call void asm sideeffect "; use $0", "v"(i32 %v36)
+  call void asm sideeffect "; use $0", "v"(i32 %v37)
+  call void asm sideeffect "; use $0", "v"(i32 %v38)
+  call void asm sideeffect "; use $0", "v"(i32 %v39)
+  call void asm sideeffect "; use $0", "v"(i32 %v40)
+  call void asm sideeffect "; use $0", "v"(i32 %v41)
+  call void asm sideeffect "; use $0", "v"(i32 %v42)
+  call void asm sideeffect "; use $0", "v"(i32 %v43)
+  call void asm sideeffect "; use $0", "v"(i32 %v44)
+  call void asm sideeffect "; use $0", "v"(i32 %v45)
+  call void asm sideeffect "; use $0", "v"(i32 %v46)
+  call void asm sideeffect "; use $0", "v"(i32 %v47)
+  call void asm sideeffect "; use $0", "v"(i32 %v48)
+  call void asm sideeffect "; use $0", "v"(i32 %v49)
+  call void asm sideeffect "; use $0", "v"(i32 %v50)
+  call void asm sideeffect "; use $0", "v"(i32 %v51)
+  call void asm sideeffect "; use $0", "v"(i32 %v52)
+  call void asm sideeffect "; use $0", "v"(i32 %v53)
+  call void asm sideeffect "; use $0", "v"(i32 %v54)
+  call void asm sideeffect "; use $0", "v"(i32 %v55)
+  call void asm sideeffect "; use $0", "v"(i32 %v56)
+  call void asm sideeffect "; use $0", "v"(i32 %v57)
+  call void asm sideeffect "; use $0", "v"(i32 %v58)
+  call void asm sideeffect "; use $0", "v"(i32 %v59)
+  call void asm sideeffect "; use $0", "v"(i32 %v60)
+  call void asm sideeffect "; use $0", "v"(i32 %v61)
+  call void asm sideeffect "; use $0", "v"(i32 %v62)
+  call void asm sideeffect "; use $0", "v"(i32 %v63)
+  call void asm sideeffect "; use $0", "v"(i32 %v64)
+  call void asm sideeffect "; use $0", "v"(i32 %v65)
+  call void asm sideeffect "; use $0", "v"(i32 %v66)
+  call void asm sideeffect "; use $0", "v"(i32 %v67)
+  call void asm sideeffect "; use $0", "v"(i32 %v68)
+  call void asm sideeffect "; use $0", "v"(i32 %v69)
+  call void asm sideeffect "; use $0", "v"(i32 %v70)
+  call void asm sideeffect "; use $0", "v"(i32 %v71)
+  call void asm sideeffect "; use $0", "v"(i32 %v72)
+  call void asm sideeffect "; use $0", "v"(i32 %v73)
+  call void asm sideeffect "; use $0", "v"(i32 %v74)
+  call void asm sideeffect "; use $0", "v"(i32 %v75)
+  call void asm sideeffect "; use $0", "v"(i32 %v76)
+  call void asm sideeffect "; use $0", "v"(i32 %v77)
+  call void asm sideeffect "; use $0", "v"(i32 %v78)
+  call void asm sideeffect "; use $0", "v"(i32 %v79)
+  call void asm sideeffect "; use $0", "v"(i32 %v80)
+  call void asm sideeffect "; use $0", "v"(i32 %v81)
+  call void asm sideeffect "; use $0", "v"(i32 %v82)
+  call void asm sideeffect "; use $0", "v"(i32 %v83)
+  call void asm sideeffect "; use $0", "v"(i32 %v84)
+  call void asm sideeffect "; use $0", "v"(i32 %v85)
+  call void asm sideeffect "; use $0", "v"(i32 %v86)
+  call void asm sideeffect "; use $0", "v"(i32 %v87)
+  call void asm sideeffect "; use $0", "v"(i32 %v88)
+  call void asm sideeffect "; use $0", "v"(i32 %v89)
+  call void asm sideeffect "; use $0", "v"(i32 %v90)
+  call void asm sideeffect "; use $0", "v"(i32 %v91)
+  call void asm sideeffect "; use $0", "v"(i32 %v92)
+  call void asm sideeffect "; use $0", "v"(i32 %v93)
+  call void asm sideeffect "; use $0", "v"(i32 %v94)
+  call void asm sideeffect "; use $0", "v"(i32 %v95)
+  call void asm sideeffect "; use $0", "v"(i32 %v96)
+  call void asm sideeffect "; use $0", "v"(i32 %v97)
+  call void asm sideeffect "; use $0", "v"(i32 %v98)
+  call void asm sideeffect "; use $0", "v"(i32 %v99)
+  call void asm sideeffect "; use $0", "v"(i32 %v100)
+  call void asm sideeffect "; use $0", "v"(i32 %v101)
+  call void asm sideeffect "; use $0", "v"(i32 %v102)
+  call void asm sideeffect "; use $0", "v"(i32 %v103)
+  call void asm sideeffect "; use $0", "v"(i32 %v104)
+  call void asm sideeffect "; use $0", "v"(i32 %v105)
+  call void asm sideeffect "; use $0", "v"(i32 %v106)
+  call void asm sideeffect "; use $0", "v"(i32 %v107)
+  call void asm sideeffect "; use $0", "v"(i32 %v108)
+  call void asm sideeffect "; use $0", "v"(i32 %v109)
+  call void asm sideeffect "; use $0", "v"(i32 %v110)
+  call void asm sideeffect "; use $0", "v"(i32 %v111)
+  call void asm sideeffect "; use $0", "v"(i32 %v112)
+  call void asm sideeffect "; use $0", "v"(i32 %v113)
+  call void asm sideeffect "; use $0", "v"(i32 %v114)
+  call void asm sideeffect "; use $0", "v"(i32 %v115)
+  call void asm sideeffect "; use $0", "v"(i32 %v116)
+  call void asm sideeffect "; use $0", "v"(i32 %v117)
+  call void asm sideeffect "; use $0", "v"(i32 %v118)
+  call void asm sideeffect "; use $0", "v"(i32 %v119)
+  call void asm sideeffect "; use $0", "v"(i32 %v120)
+  call void asm sideeffect "; use $0", "v"(i32 %v121)
+  call void asm sideeffect "; use $0", "v"(i32 %v122)
+  call void asm sideeffect "; use $0", "v"(i32 %v123)
+  call void asm sideeffect "; use $0", "v"(i32 %v124)
+  call void asm sideeffect "; use $0", "v"(i32 %v125)
+  call void asm sideeffect "; use $0", "v"(i32 %v126)
+  call void asm sideeffect "; use $0", "v"(i32 %v127)
+  call void asm sideeffect "; use $0", "v"(i32 %v128)
+  call void asm sideeffect "; use $0", "v"(i32 %v129)
+  call void asm sideeffect "; use $0", "v"(i32 %v130)
+  call void asm sideeffect "; use $0", "v"(i32 %v131)
+  call void asm sideeffect "; use $0", "v"(i32 %v132)
+  call void asm sideeffect "; use $0", "v"(i32 %v133)
+  call void asm sideeffect "; use $0", "v"(i32 %v134)
+  call void asm sideeffect "; use $0", "v"(i32 %v135)
+  call void asm sideeffect "; use $0", "v"(i32 %v136)
+  call void asm sideeffect "; use $0", "v"(i32 %v137)
+  call void asm sideeffect "; use $0", "v"(i32 %v138)
+  call void asm sideeffect "; use $0", "v"(i32 %v139)
+  call void asm sideeffect "; use $0", "v"(i32 %v140)
+  call void asm sideeffect "; use $0", "v"(i32 %v141)
+  call void asm sideeffect "; use $0", "v"(i32 %v142)
+  call void asm sideeffect "; use $0", "v"(i32 %v143)
+  call void asm sideeffect "; use $0", "v"(i32 %v144)
+  call void asm sideeffect "; use $0", "v"(i32 %v145)
+  call void asm sideeffect "; use $0", "v"(i32 %v146)
+  call void asm sideeffect "; use $0", "v"(i32 %v147)
+  call void asm sideeffect "; use $0", "v"(i32 %v148)
+  call void asm sideeffect "; use $0", "v"(i32 %v149)
+  call void asm sideeffect "; use $0", "v"(i32 %v150)
+  call void asm sideeffect "; use $0", "v"(i32 %v151)
+  call void asm sideeffect "; use $0", "v"(i32 %v152)
+  call void asm sideeffect "; use $0", "v"(i32 %v153)
+  call void asm sideeffect "; use $0", "v"(i32 %v154)
+  call void asm sideeffect "; use $0", "v"(i32 %v155)
+  call void asm sideeffect "; use $0", "v"(i32 %v156)
+  call void asm sideeffect "; use $0", "v"(i32 %v157)
+  call void asm sideeffect "; use $0", "v"(i32 %v158)
+  call void asm sideeffect "; use $0", "v"(i32 %v159)
+  call void asm sideeffect "; use $0", "v"(i32 %v160)
+  call void asm sideeffect "; use $0", "v"(i32 %v161)
+  call void asm sideeffect "; use $0", "v"(i32 %v162)
+  call void asm sideeffect "; use $0", "v"(i32 %v163)
+  call void asm sideeffect "; use $0", "v"(i32 %v164)
+  call void asm sideeffect "; use $0", "v"(i32 %v165)
+  call void asm sideeffect "; use $0", "v"(i32 %v166)
+  call void asm sideeffect "; use $0", "v"(i32 %v167)
+  call void asm sideeffect "; use $0", "v"(i32 %v168)
+  call void asm sideeffect "; use $0", "v"(i32 %v169)
+  call void asm sideeffect "; use $0", "v"(i32 %v170)
+  call void asm sideeffect "; use $0", "v"(i32 %v171)
+  call void asm sideeffect "; use $0", "v"(i32 %v172)
+  call void asm sideeffect "; use $0", "v"(i32 %v173)
+  call void asm sideeffect "; use $0", "v"(i32 %v174)
+  call void asm sideeffect "; use $0", "v"(i32 %v175)
+  call void asm sideeffect "; use $0", "v"(i32 %v176)
+  call void asm sideeffect "; use $0", "v"(i32 %v177)
+  call void asm sideeffect "; use $0", "v"(i32 %v178)
+  call void asm sideeffect "; use $0", "v"(i32 %v179)
+  call void asm sideeffect "; use $0", "v"(i32 %v180)
+  call void asm sideeffect "; use $0", "v"(i32 %v181)
+  call void asm sideeffect "; use $0", "v"(i32 %v182)
+  call void asm sideeffect "; use $0", "v"(i32 %v183)
+  call void asm sideeffect "; use $0", "v"(i32 %v184)
+  call void asm sideeffect "; use $0", "v"(i32 %v185)
+  call void asm sideeffect "; use $0", "v"(i32 %v186)
+  call void asm sideeffect "; use $0", "v"(i32 %v187)
+  call void asm sideeffect "; use $0", "v"(i32 %v188)
+  call void asm sideeffect "; use $0", "v"(i32 %v189)
+  call void asm sideeffect "; use $0", "v"(i32 %v190)
+  call void asm sideeffect "; use $0", "v"(i32 %v191)
+  call void asm sideeffect "; use $0", "v"(i32 %v192)
+  call void asm sideeffect "; use $0", "v"(i32 %v193)
+  call void asm sideeffect "; use $0", "v"(i32 %v194)
+  call void asm sideeffect "; use $0", "v"(i32 %v195)
+  call void asm sideeffect "; use $0", "v"(i32 %v196)
+  call void asm sideeffect "; use $0", "v"(i32 %v197)
+  call void asm sideeffect "; use $0", "v"(i32 %v198)
+  call void asm sideeffect "; use $0", "v"(i32 %v199)
+  call void asm sideeffect "; use $0", "v"(i32 %v200)
+  call void asm sideeffect "; use $0", "v"(i32 %v201)
+  call void asm sideeffect "; use $0", "v"(i32 %v202)
+  call void asm sideeffect "; use $0", "v"(i32 %v203)
+  call void asm sideeffect "; use $0", "v"(i32 %v204)
+  call void asm sideeffect "; use $0", "v"(i32 %v205)
+  call void asm sideeffect "; use $0", "v"(i32 %v206)
+  call void asm sideeffect "; use $0", "v"(i32 %v207)
+  call void asm sideeffect "; use $0", "v"(i32 %v208)
+  call void asm sideeffect "; use $0", "v"(i32 %v209)
+  call void asm sideeffect "; use $0", "v"(i32 %v210)
+  call void asm sideeffect "; use $0", "v"(i32 %v211)
+  call void asm sideeffect "; use $0", "v"(i32 %v212)
+  call void asm sideeffect "; use $0", "v"(i32 %v213)
+  call void asm sideeffect "; use $0", "v"(i32 %v214)
+  call void asm sideeffect "; use $0", "v"(i32 %v215)
+  call void asm sideeffect "; use $0", "v"(i32 %v216)
+  call void asm sideeffect "; use $0", "v"(i32 %v217)
+  call void asm sideeffect "; use $0", "v"(i32 %v218)
+  call void asm sideeffect "; use $0", "v"(i32 %v219)
+  call void asm sideeffect "; use $0", "v"(i32 %v220)
+  call void asm sideeffect "; use $0", "v"(i32 %v221)
+  call void asm sideeffect "; use $0", "v"(i32 %v222)
+  call void asm sideeffect "; use $0", "v"(i32 %v223)
+  call void asm sideeffect "; use $0", "v"(i32 %v224)
+  call void asm sideeffect "; use $0", "v"(i32 %v225)
+  call void asm sideeffect "; use $0", "v"(i32 %v226)
+  call void asm sideeffect "; use $0", "v"(i32 %v227)
+  call void asm sideeffect "; use $0", "v"(i32 %v228)
+  call void asm sideeffect "; use $0", "v"(i32 %v229)
+  call void asm sideeffect "; use $0", "v"(i32 %v230)
+  call void asm sideeffect "; use $0", "v"(i32 %v231)
+  call void asm sideeffect "; use $0", "v"(i32 %v232)
+  call void asm sideeffect "; use $0", "v"(i32 %v233)
+  call void asm sideeffect "; use $0", "v"(i32 %v234)
+  call void asm sideeffect "; use $0", "v"(i32 %v235)
+  call void asm sideeffect "; use $0", "v"(i32 %v236)
+  call void asm sideeffect "; use $0", "v"(i32 %v237)
+  call void asm sideeffect "; use $0", "v"(i32 %v238)
+  call void asm sideeffect "; use $0", "v"(i32 %v239)
+  call void asm sideeffect "; use $0", "v"(i32 %v240)
+  call void asm sideeffect "; use $0", "v"(i32 %v241)
+  call void asm sideeffect "; use $0", "v"(i32 %v242)
+  call void asm sideeffect "; use $0", "v"(i32 %v243)
+  call void asm sideeffect "; use $0", "v"(i32 %v244)
+  call void asm sideeffect "; use $0", "v"(i32 %v245)
+  call void asm sideeffect "; use $0", "v"(i32 %v246)
+  call void asm sideeffect "; use $0", "v"(i32 %v247)
+  call void asm sideeffect "; use $0", "v"(i32 %v248)
+  call void asm sideeffect "; use $0", "v"(i32 %v249)
+  call void asm sideeffect "; use $0", "v"(i32 %v250)
+  call void asm sideeffect "; use $0", "v"(i32 %v251)
+  call void asm sideeffect "; use $0", "v"(i32 %v252)
+  call void asm sideeffect "; use $0", "v"(i32 %v253)
+  call void asm sideeffect "; use $0", "v"(i32 %v254)
+  call void asm sideeffect "; use $0", "v"(i32 %v255)
+  ret void
+}
+
 define internal void @use512vgprs() {
   %v0 = call <32 x i32> asm sideeffect "; def $0", "=v"()
   %v1 = call <32 x i32> asm sideeffect "; def $0", "=v"()
@@ -560,12 +1076,13 @@ define amdgpu_kernel void @k256_w8() #2568 {
 ; GFX90A: NumVgprs: 64
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 64
-define amdgpu_kernel void @k256_w8_no_agprs() #2568 {
+define amdgpu_kernel void @k256_w8_no_agprs() #2569 {
   call void @use256vgprs()
   ret void
 }
 
 attributes #2568 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" }
+attributes #2569 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" "amdgpu-no-agpr" }
 
 ; GCN-LABEL: {{^}}k256_w4:
 ; GFX90A: NumVgprs: 64
@@ -581,12 +1098,13 @@ define amdgpu_kernel void @k256_w4() #2564 {
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k256_w4_no_agprs() #2564 {
+define amdgpu_kernel void @k256_w4_no_agprs() #2565 {
   call void @use256vgprs()
   ret void
 }
 
 attributes #2564 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" }
+attributes #2565 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" "amdgpu-no-agpr" }
 
 ; GCN-LABEL: {{^}}k256_w2:
 ; GFX90A: NumVgprs: 128
@@ -602,12 +1120,13 @@ define amdgpu_kernel void @k256_w2() #2562 {
 ; GFX90A: NumVgprs: 256
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k256_w2_no_agprs() #2562 {
+define amdgpu_kernel void @k256_w2_no_agprs() #2563 {
   call void @use256vgprs()
   ret void
 }
 
 attributes #2562 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" }
+attributes #2563 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
 
 ; GCN-LABEL: {{^}}k256_w1:
 ; GFX90A: NumVgprs: 256
@@ -634,7 +1153,7 @@ attributes #2561 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-wa
 ; GFX90A: NumVgprs: 256
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k512_no_agprs() #512 {
+define amdgpu_kernel void @k512_no_agprs() #513 {
   call void @use256vgprs()
   ret void
 }
@@ -677,13 +1196,39 @@ define void @f512() #512 {
   ret void
 }
 
+; GCN-LABEL: {{^}}f512_no_agpr:
+; GFX90A: NumVgprs: 256
+; GFX90A: NumAgprs: 0
+define void @f512_no_agpr() #513 {
+  call void @use256vgprs_no_agpr()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f512_no_agpr_ub:
+; GFX90A: NumVgprs: 256
+; GFX90A: NumAgprs: 0
+define void @f512_no_agpr_ub() #513 {
+  call void @use256vgprs()
+  ret void
+}
+
 attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
+attributes #513 = { nounwind "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" }
 
 ; GCN-LABEL: {{^}}k1024:
+; GFX90A: NumVgprs: 64
+; GFX90A: NumAgprs: 64
+; GFX90A: TotalNumVgprs: 128
+define amdgpu_kernel void @k1024() #1024 {
+  call void @use256vgprs()
+  ret void
+}
+
+; GCN-LABEL: {{^}}k1024_no_agprs:
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k1024() #1024 {
+define amdgpu_kernel void @k1024_no_agprs() #1025 {
   call void @use256vgprs()
   ret void
 }
@@ -698,4 +1243,61 @@ define amdgpu_kernel void @k1024_call() #1024 {
   ret void
 }
 
+; GCN-LABEL: {{^}}k1024_call_no_agprs:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define amdgpu_kernel void @k1024_call_no_agprs() #1025 {
+  call void @use256vgprs()
+  ret void
+}
+
+; @foo uses an AGPRs so amdgpu-no-agpr is undefined
+; GCN-LABEL: {{^}}k1024_call_no_agprs_ub_callee:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 1
+; GFX90A: TotalNumVgprs: 129
+define amdgpu_kernel void @k1024_call_no_agprs_ub_callee() #1025 {
+  call void @foo()
+  call void @use256vgprs()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f1024_0:
+; GFX90A: NumVgprs: 32
+; GFX90A: NumAgprs: 1
+; GFX90A: TotalNumVgprs: 33
+define void @f1024_0() #1024 {
+  call void @foo()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f1024_1:
+; GFX90A: NumVgprs: 64
+; GFX90A: NumAgprs: 32
+; GFX90A: TotalNumVgprs: 96
+define void @f1024_1() #1024 {
+  call void @use256vgprs()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f1024_call_no_agprs:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define void @f1024_call_no_agprs() #1025 {
+  call void @use256vgprs_no_agpr()
+  ret void
+}
+
+; GCN-LABEL: {{^}}f1024_call_no_agprs_ub:
+; GFX90A: NumVgprs: 128
+; GFX90A: NumAgprs: 0
+; GFX90A: TotalNumVgprs: 128
+define void @f1024_call_no_agprs_ub() #1025 {
+  call void @use256vgprs()
+  ret void
+}
+
 attributes #1024 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" }
+attributes #1025 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-no-agpr" }



More information about the llvm-commits mailing list