[llvm] [AMDGPU] Prefer lower total register usage in regions with spilling (PR #71882)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 9 16:36:19 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Jeffrey Byrnes (jrbyrnes)

<details>
<summary>Changes</summary>

In general, we say RP is less if it:
1. has better occupancy
2. has same occupancy, but better tuple register pressure
3. has same occupancy, and same tuple register pressure, but better register pressure.

This is good for the general case, but is not good for the case when there is spilling. In cases with spilling, we should prefer the schedule with less RP rather than giving preference to tuple RP. Otherwise, we may increase RP by hundreds in order to save a few tuples.

A note on the test:
For the OccInitialSchedule stage we have
PressureAfter: VGPRs: 67 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
PressureBefore: VGPRs: 72 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 4

We have improved VGPR RP at expense of SGPR Tuple pressure. The scheduler decided to define and use the second sgpr_128 before killing the first. We should not prefer the original order. 

---

Patch is 20.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71882.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp (+8-6) 
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.cpp (+20-2) 
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+3-2) 
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+1-2) 
- (added) llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir (+353) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index d89c9b1febded0f..62a03eec141671e 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -410,9 +410,11 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
 // Sort recorded regions by pressure - highest at the front
 void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
-  llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
-    return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
-  });
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  llvm::sort(
+      Regions, [&ST, TargetOcc, &MFI](const Region *R1, const Region *R2) {
+        return R2->MaxPressure.less(ST, *MFI, R1->MaxPressure, TargetOcc);
+      });
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -524,19 +526,19 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
 
   auto MaxPressure = Regions.front()->MaxPressure;
   for (auto *R : Regions) {
-    if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
+    if (!force && R->MaxPressure.less(ST, *MFI, MaxPressure, TgtOcc))
       break;
 
     BuildDAG DAG(*R, *this);
     const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
 
     const auto RP = getSchedulePressure(*R, MinSchedule);
-    LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
+    LLVM_DEBUG(if (R->MaxPressure.less(ST, *MFI, RP, TgtOcc)) {
       dbgs() << "\nWarning: Pressure becomes worse after minreg!";
       printSchedRP(dbgs(), R->MaxPressure, RP);
     });
 
-    if (!force && MaxPressure.less(ST, RP, TgtOcc))
+    if (!force && MaxPressure.less(ST, *MFI, RP, TgtOcc))
       break;
 
     scheduleRegion(*R, MinSchedule, RP);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index a04c470b7b9762f..9f6ccd3974a3842 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -13,6 +13,7 @@
 
 #include "GCNRegPressure.h"
 #include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 
 using namespace llvm;
@@ -89,7 +90,8 @@ void GCNRegPressure::inc(unsigned Reg,
 }
 
 bool GCNRegPressure::less(const GCNSubtarget &ST,
-                          const GCNRegPressure& O,
+                          const SIMachineFunctionInfo &MFI,
+                          const GCNRegPressure &O,
                           unsigned MaxOccupancy) const {
   const auto SGPROcc = std::min(MaxOccupancy,
                                 ST.getOccupancyWithNumSGPRs(getSGPRNum()));
@@ -104,6 +106,8 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
 
   const auto Occ = std::min(SGPROcc, VGPROcc);
   const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
+
+  // Give first prefernce to the better occupancy
   if (Occ != OtherOcc)
     return Occ > OtherOcc;
 
@@ -115,7 +119,19 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
     SGPRImportant = false;
   }
 
-  // compare large regs pressure
+  // In regions with spilling, we should give prefernce to the schedule with
+  // less general RP.
+  if (Occ <= MFI.getMinWavesPerEU()) {
+    unsigned GPRPressure =
+        SGPRImportant ? getSGPRNum() : getVGPRNum(ST.hasGFX90AInsts());
+    unsigned OtherGPRPressure =
+        SGPRImportant ? O.getSGPRNum() : O.getVGPRNum(ST.hasGFX90AInsts());
+
+    if (GPRPressure != OtherGPRPressure)
+      return GPRPressure < OtherGPRPressure;
+  }
+
+  // Give second prefernce to less register tuple pressure
   bool SGPRFirst = SGPRImportant;
   for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
     if (SGPRFirst) {
@@ -130,6 +146,8 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
         return VW < OtherVW;
     }
   }
+
+  // Give final prefernce to less general RP
   return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
                          (getVGPRNum(ST.hasGFX90AInsts()) <
                           O.getVGPRNum(ST.hasGFX90AInsts()));
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index c750fe74749e2b3..2b428c44e46bb00 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -74,8 +74,9 @@ struct GCNRegPressure {
     return getOccupancy(ST) > O.getOccupancy(ST);
   }
 
-  bool less(const GCNSubtarget &ST, const GCNRegPressure& O,
-    unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
+  bool less(const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI,
+            const GCNRegPressure &O,
+            unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
 
   bool operator==(const GCNRegPressure &O) const {
     return std::equal(&Value[0], &Value[TOTAL_KINDS], O.Value);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index c3d60b635d3240a..975ba050e6be01f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1184,8 +1184,7 @@ bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
 
 bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
   if (WavesAfter <= MFI.getMinWavesPerEU() &&
-      !PressureAfter.less(ST, PressureBefore) &&
-      isRegionWithExcessRP()) {
+      !PressureAfter.less(ST, MFI, PressureBefore) && isRegionWithExcessRP()) {
     LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
     return true;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
new file mode 100644
index 000000000000000..567895c5fb434b2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
@@ -0,0 +1,353 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -march=amdgcn -mcpu=gfx90a -start-before=machine-scheduler -stop-after=machine-scheduler -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+--- |
+  define amdgpu_kernel void @spill_regpressure_less() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
+...
+
+---
+name:            spill_regpressure_less
+tracksRegLiveness: true
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       8
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: spill_regpressure_less
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF32:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF34:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF35:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF36:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF37:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF38:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF39:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF40:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF41:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF42:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF43:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF44:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF45:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF46:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF47:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF48:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF49:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF50:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF51:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF52:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF53:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF54:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF55:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF56:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF57:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF58:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF59:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF60:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF61:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF62:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF63:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF64:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF65:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF66:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]]
+    ; GCN-NEXT: KILL [[DEF]]
+    ; GCN-NEXT: KILL [[DEF1]]
+    ; GCN-NEXT: KILL [[DEF10]]
+    ; GCN-NEXT: KILL [[DEF12]]
+    ; GCN-NEXT: KILL [[DEF13]]
+    ; GCN-NEXT: KILL [[DEF14]]
+    ; GCN-NEXT: KILL [[DEF15]]
+    ; GCN-NEXT: KILL [[DEF16]]
+    ; GCN-NEXT: [[DEF67:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: KILL [[DEF17]]
+    ; GCN-NEXT: [[DEF68:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF69:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
+    ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF69]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]]
+    ; GCN-NEXT: KILL [[DEF2]]
+    ; GCN-NEXT: KILL [[DEF3]]
+    ; GCN-NEXT: KILL [[DEF4]]
+    ; GCN-NEXT: KILL [[DEF5]]
+    ; GCN-NEXT: KILL [[DEF6]]
+    ; GCN-NEXT: KILL [[DEF7]]
+    ; GCN-NEXT: KILL [[DEF8]]
+    ; GCN-NEXT: KILL [[DEF9]]
+    ; GCN-NEXT: KILL [[DEF18]]
+    ; GCN-NEXT: KILL [[DEF19]]
+    ; GCN-NEXT: [[DEF70:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
+    ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF70]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
+    ; GCN-NEXT: KILL [[DEF69]], implicit-def %70, implicit-def %71, implicit-def %72, implicit-def %73, implicit-def %74, implicit-def %75, implicit-def %76, implicit-def %77
+    ; GCN-NEXT: [[DEF71:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF72:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: KILL [[DEF20]]
+    ; GCN-NEXT: [[DEF73:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: KILL [[DEF11]]
+    ; GCN-NEXT: [[DEF74:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: KILL [[DEF21]]
+    ; GCN-NEXT: [[DEF75:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: KILL [[DEF22]]
+    ; GCN-NEXT: [[DEF76:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: KILL [[DEF23]]
+    ; GCN-NEXT: KILL [[DEF24]]
+    ; GCN-NEXT: KILL [[DEF25]]
+    ; GCN-NEXT: KILL [[DEF26]]
+    ; GCN-NEXT: KILL [[DEF27]]
+    ; GCN-NEXT: KILL [[DEF28]]
+    ; GCN-NEXT: KILL [[DEF29]]
+    ; GCN-NEXT: KILL [[DEF30]]
+    ; GCN-NEXT: KILL [[DEF31]]
+    ; GCN-NEXT: KILL [[DEF32]]
+    ; GCN-NEXT: KILL [[DEF33]]
+    ; GCN-NEXT: KILL [[DEF34]]
+    ; GCN-NEXT: KILL [[DEF35]]
+    ; GCN-NEXT: KILL [[DEF36]]
+    ; GCN-NEXT: KILL [[DEF37]]
+    ; GCN-NEXT: KILL [[DEF38]]
+    ; GCN-NEXT: KILL [[DEF39]]
+    ; GCN-NEXT: KILL [[DEF40]]
+    ; GCN-NEXT: KILL [[DEF41]]
+    ; GCN-NEXT: KILL [[DEF42]]
+    ; GCN-NEXT: KILL [[DEF43]]
+    ; GCN-NEXT: KILL [[DEF44]]
+    ; GCN-NEXT: KILL [[DEF45]]
+    ; GCN-NEXT: KILL [[DEF46]]
+    ; GCN-NEXT: KILL [[DEF47]]
+    ; GCN-NEXT: KILL [[DEF48]]
+    ; GCN-NEXT: KILL [[DEF49]]
+    ; GCN-NEXT: KILL [[DEF50]]
+    ; GCN-NEXT: KILL [[DEF51]]
+    ; GCN-NEXT: KILL [[DEF52]]
+    ; GCN-NEXT: KILL [[DEF53]]
+    ; GCN-NEXT: KILL [[DEF54]]
+    ; GCN-NEXT: KILL [[DEF55]]
+    ; GCN-NEXT: KILL [[DEF56]]
+    ; GCN-NEXT: KILL [[DEF57]]
+    ; GCN-NEXT: KILL [[DEF58]]
+    ; GCN-NEXT: KILL [[DEF59]]
+    ; GCN-NEXT: KILL [[DEF60]]
+    ; GCN-NEXT: KILL [[DEF61]]
+    ; GCN-NEXT: KILL [[DEF62]]
+    ; GCN-NEXT: KILL [[DEF63]]
+    ; GCN-NEXT: KILL [[DEF64]]
+    ; GCN-NEXT: KILL [[DEF65]]
+    ; GCN-NEXT: KILL [[DEF66]]
+    ; GCN-NEXT: KILL [[DEF67]]
+    ; GCN-NEXT: KILL [[DEF68]]
+    ; GCN-NEXT: KILL [[DEF71]]
+    ; GCN-NEXT: KILL [[DEF72]]
+    ; GCN-NEXT: KILL [[DEF73]]
+    ; GCN-NEXT: KILL [[DEF74]]
+    ; GCN-NEXT: KILL [[DEF75]]
+    ; GCN-NEXT: KILL [[DEF76]]
+    ; GCN-NEXT: KILL [[DEF70]]
+    ; GCN-NEXT: KILL %70
+    ; GCN-NEXT: KILL %71
+    ; GCN-NEXT: KILL %72
+    ; GCN-NEXT: KILL %73
+    ; GCN-NEXT: KILL %74
+    ; GCN-NEXT: KILL %75
+    ; GCN-NEXT: KILL %76
+    ; GCN-NEXT: KILL %77
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = IMPLICIT_DEF
+    %5:vgpr_32 = IMPLICIT_DEF
+    %6:vgpr_32 = IMPLICIT_DEF
+    %7:vgpr_32 = IMPLICIT_DEF
+    %8:vgpr_32 = IMPLICIT_DEF
+    %9:vgpr_32 = IMPLICIT_DEF
+    %10:vgpr_32 = IMPLICIT_DEF
+    %11:vgpr_32 = IMPLICIT_DEF
+    %12:vgpr_32 = IMPLICIT_DEF
+    %13:vgpr_32 = IMPLICIT_DEF
+    %14:vgpr_32 = IMPLICIT_DEF
+    %15:vgpr_32 = IMPLICIT_DEF
+    %16:vgpr_32 = IMPLICIT_DEF
+    %17:vgpr_32 = IMPLICIT_DEF
+    %18:vgpr_32 = IMPLICIT_DEF
+    %19:vgpr_32 = IMPLICIT_DEF
+    %20:vgpr_32 = IMPLICIT_DEF
+    %21:vgpr_32 = IMPLICIT_DEF
+    %22:vgpr_32 = IMPLICIT_DEF
+    %23:vgpr_32 = IMPLICIT_DEF
+    %24:vgpr_32 = IMPLICIT_DEF
+    %25:vgpr_32 = IMPLICIT_DEF
+    %26:vgpr_32 = IMPLICIT_DEF
+    %27:vgpr_32 = IMPLICIT_DEF
+    %28:vgpr_32 = IMPLICIT_DEF
+    %29:vgpr_32 = IMPLICIT_DEF
+    %30:vgpr_32 = IMPLICIT_DEF
+    %31:vgpr_32 = IMPLICIT_DEF
+    %32:vgpr_32 = IMPLICIT_DEF
+    %33:vgpr_32 = IMPLICIT_DEF
+    %34:vgpr_32 = IMPLICIT_DEF
+    %35:vgpr_32 = IMPLICIT_DEF
+    %36:vgpr_32 = IMPLICIT_DEF
+    %37:vgpr_32 = IMPLICIT_DEF
+    %38:vgpr_32 = IMPLICIT_DEF
+    %39:vgpr_32 = IMPLICIT_DEF
+    %40:vgpr_32 = IMPLICIT_DEF
+    %41:vgpr_32 = IMPLICIT_DEF
+    %42:vgpr_32 = IMPLICIT_DEF
+    %43:vgpr_32 = IMPLICIT_DEF
+    %44:vgpr_32 = IMPLICIT_DEF
+    %45:vgpr_32 = IMPLICIT_DEF
+    %46:vgpr_32 = IMPLICIT_DEF
+    %47:vgpr_32 = IMPLICIT_DEF
+    %48:vgpr_32 = IMPLICIT_DEF
+    %49:vgpr_32 = IMPLICIT_DEF
+    %50:vgpr_32 = IMPLICIT_DEF
+    %51:vgpr_32 = IMPLICIT_DEF
+    %52:vgpr_32 = IMPLICIT_DEF
+    %53:vgpr_32 = IMPLICIT_DEF
+    %54:vgpr_32 = IMPLICIT_DEF
+    %55:vgpr_32 = IMPLICIT_DEF
+    %56:vgpr_32 = IMPLICIT_DEF
+    %57:vgpr_32 = IMPLICIT_DEF
+    %58:vgpr_32 = IMPLICIT_DEF
+    %59:vgpr_32 = IMPLICIT_DEF
+    %60:vgpr_32 = IMPLICIT_DEF
+    %61:vgpr_32 = IMPLICIT_DEF
+    %62:vgpr_32 = IMPLICIT_DEF
+    %63:vgpr_32 = IMPLICIT_DEF
+    %64:vgpr_32 = IMPLICIT_DEF
+    %65:vgpr_32 = IMPLICIT_DEF
+    %66:vgpr_32 = IMPLICIT_DEF
+    %67:vgpr_32 = IMPLICIT_DEF
+    %68:vgpr_32 = IMPLICIT_DEF
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34, implicit %35, implicit %36, implicit %37, implicit %38, implicit %39, implicit %40, implicit %41, implicit %42, implicit %43, implicit %44, implicit %45, implicit %46, implicit %47, implicit %48, implicit %49, implicit %50, implicit %51, implicit %52, implicit %53, implicit %54, implicit %55, implicit %56, implicit %57, implicit %58, implicit %59, implicit %60, implicit %61, implicit %62, implicit %63, implicit %64, implicit %65, implicit %66
+    %69:sgpr_128 = IMPLICIT_DEF
+    INLINEASM &"", 1, implicit %69, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28
+    KILL %0
+    KILL %1
+    KILL %2
+    KILL %3
+    KILL %4
+    KILL %5
+    KILL %6
+    KILL %7
+    KILL %8
+    KILL %9
+    KILL %10
+    KILL %12
+    KILL %13
+    KILL %14
+    KILL %15
+    KILL %16
+    KILL %17
+    KILL %18
+    KILL %19
+    KILL %69:sgpr_128, implicit-def %77:vgpr_32, implicit-def %78:vgpr_32, implicit-def %79:vgpr_32, implicit-def %80:vgpr_32, implicit-def %81:vgpr_32, implicit-def %82:vgpr_32, implicit-def %83:vgpr_32, implicit-def %84:vgpr_32
+    %70:vgpr_32 = IMPLICIT_DEF
+    %71:vgpr_32 = IMPLICIT_DEF
+    %72:vgpr_32 = IMPLICIT_DEF
+    %73:vgpr_32 = IMPLICIT_DEF
+    %74:vgpr_32 = IMPLICIT_DEF
+    %75:vgpr_32 = IMPLICIT_DEF
+    %76:sgpr_128 = IMPLICIT_DEF
+    INLINEASM &"", 1, implicit %76, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+    KILL %20
+    KILL %11
+    KILL %21
+    KILL %22
+    KILL %23
+    KILL %24
+    KILL %25
+    KILL %26
+    KILL %27
+    KILL %28
+    KILL %29
+    KILL %30
+    KILL %31
+    KILL %32
+    KILL %33
+    KILL %34
+    KILL %35
+    KILL %36
+    KILL %37
+    KILL %38
+    KILL %39
+    KILL %40
+    KILL %41
+    KILL %42
+    KILL %43
+    KILL %44
+    KILL %45
+    KILL %46
+    KILL %47
+    KILL %48
+    KILL %49
+    KILL %50
+    KILL %51
+    KILL %52
+    KILL %53
+    KILL %54
+    KILL %55
+    KILL %56
+    KILL %57
+    KILL %58
+    KILL %59
+    KILL %60
+    KILL %61
+    KILL %62
+    KIL...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/71882


More information about the llvm-commits mailing list