[llvm] [AMDGPU] Fix GCNUpwardRPTracker: max register pressure on defs. (PR #74422)
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 23:12:13 PST 2023
https://github.com/vpykhtin created https://github.com/llvm/llvm-project/pull/74422
Treat a defined register as fully live "at" the instruction and update maximum pressure accordingly.
Fixes https://github.com/llvm/llvm-project/pull/73786. First part from https://github.com/llvm/llvm-project/pull/74328.
Tests will be presubmitted separately, please take a look at https://github.com/llvm/llvm-project/commit/6223a029b6800049c689a2468d0f8781447c14de for the diff.
>From 0f6ac3807b64d26fe6fede87ae33c655f33436fe Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 5 Dec 2023 07:35:18 +0100
Subject: [PATCH 1/2] [AMDGPU] Presubmit test: max pressure on defs.
---
.../CodeGen/AMDGPU/regpressure_printer.mir | 115 +++++++++++++++++-
1 file changed, 113 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index a1722c42b189f..83e85ccf7f8f1 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null -verify-machineinstrs --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null -verify-machineinstrs --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
---
@@ -666,3 +666,114 @@ body: |
EXP_DONE 0, %49:vgpr_32, undef %51:vgpr_32, undef %53:vgpr_32, undef %55:vgpr_32, -1, 0, 1, implicit $exec
S_ENDPGM 0
...
+---
+name: test_partially_used_def
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-LABEL: name: test_partially_used_def
+ ; RPU: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 4 0 %1:sgpr_128 = COPY %0:sgpr_128
+ ; RPU-NEXT: 1 0
+ ; RPU-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: 0 0
+ ;
+ ; RPD-LABEL: name: test_partially_used_def
+ ; RPD: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 8 0 %1:sgpr_128 = COPY %0:sgpr_128
+ ; RPD-NEXT: 1 0
+ ; RPD-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: 0 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr_128 = COPY %0:sgpr_128
+ S_NOP 0, implicit %1.sub1
+...
+---
+name: test_partially_used_early_clobber_def
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-LABEL: name: test_partially_used_early_clobber_def
+ ; RPU: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 5 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
+ ; RPU-NEXT: 1 0
+ ; RPU-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: 0 0
+ ;
+ ; RPD-LABEL: name: test_partially_used_early_clobber_def
+ ; RPD: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
+ ; RPD-NEXT: 1 0
+ ; RPD-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: 0 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_128 = COPY %0:sgpr_128
+ S_NOP 0, implicit %1.sub1
+...
+---
+name: test_partially_used_def_and_early_clobber_def
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-LABEL: name: test_partially_used_def_and_early_clobber_def
+ ; RPU: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 7 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
+ ; RPU-NEXT: 6 0
+ ; RPU-NEXT: 6 0 S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: 0 0
+ ;
+ ; RPD-LABEL: name: test_partially_used_def_and_early_clobber_def
+ ; RPD: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 20 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
+ ; RPD-NEXT: 6 0
+ ; RPD-NEXT: 6 0 S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: 0 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def early-clobber %4:sgpr_128
+ S_NOP 0, implicit %1.sub1, implicit %2.sub0_sub1, implicit %3.sub0_sub1_sub2
+...
>From 6223a029b6800049c689a2468d0f8781447c14de Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 5 Dec 2023 07:36:40 +0100
Subject: [PATCH 2/2] [AMDGPU] Fix GCNUpwardRPTracker: max pressure on defs.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 50 ++---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 26 +++
.../CodeGen/AMDGPU/regpressure_printer.mir | 171 ++++++------------
3 files changed, 112 insertions(+), 135 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5ebf834377f2c..fd8f0bebd3bec 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -274,32 +274,42 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (MI.isDebugInstr())
return;
- auto DecrementDef = [this](const MachineOperand &MO) {
+ // Kill all defs.
+ GCNRegPressure DefPressure, ECDefPressure;
+ bool HasECDefs = false;
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
+ continue;
+
Register Reg = MO.getReg();
+ LaneBitmask DefMask = getDefRegMask(MO, *MRI);
+
+ // Treat a def as fully live at the moment of definition: keep a record.
+ if (MO.isEarlyClobber()) {
+ ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+ HasECDefs = true;
+ } else
+ DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+
auto I = LiveRegs.find(Reg);
if (I == LiveRegs.end())
- return;
+ continue;
LaneBitmask &LiveMask = I->second;
LaneBitmask PrevMask = LiveMask;
- LiveMask &= ~getDefRegMask(MO, *MRI);
+ LiveMask &= ~DefMask;
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
LiveRegs.erase(I);
- };
-
- // Decrement non-early-clobber defs.
- SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
- for (const MachineOperand &MO : MI.all_defs()) {
- if (!MO.getReg().isVirtual())
- continue;
- if (!MO.isEarlyClobber())
- DecrementDef(MO);
- else
- EarlyClobberDefs.push_back(&MO);
}
- // Increment uses.
+ // Update MaxPressure with defs pressure.
+ DefPressure += CurPressure;
+ if (HasECDefs)
+ DefPressure += ECDefPressure;
+ MaxPressure = max(DefPressure, MaxPressure);
+
+ // Make uses alive.
SmallVector<RegisterMaskPair, 8> RegUses;
collectVirtualRegUses(RegUses, MI, LIS, *MRI);
for (const RegisterMaskPair &U : RegUses) {
@@ -309,13 +319,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
}
- // Point of maximum pressure: non-early-clobber defs are decremented and uses
- // are incremented.
- MaxPressure = max(CurPressure, MaxPressure);
-
- // Now decrement early clobber defs.
- for (const MachineOperand *MO : EarlyClobberDefs)
- DecrementDef(*MO);
+ // Update MaxPressure with uses plus early-clobber defs pressure.
+ MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
+ : max(CurPressure, MaxPressure);
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index e21bf10d795ba..4100970fe1a96 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -85,6 +85,18 @@ struct GCNRegPressure {
return !(*this == O);
}
+ GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] += RHS.Value[I];
+ return *this;
+ }
+
+ GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] -= RHS.Value[I];
+ return *this;
+ }
+
void dump() const;
private:
@@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
return Res;
}
+inline GCNRegPressure operator+(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Sum = P1;
+ Sum += P2;
+ return Sum;
+}
+
+inline GCNRegPressure operator-(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Diff = P1;
+ Diff -= P2;
+ return Diff;
+}
+
class GCNRPTracker {
public:
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index 83e85ccf7f8f1..f0c5ba489ef3d 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -47,87 +47,46 @@ body: |
name: live_through_test
tracksRegLiveness: true
body: |
- ; RPU-LABEL: name: live_through_test
- ; RPU: bb.0:
- ; RPU-NEXT: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: Live-out: %0:00000000000000F3
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: bb.1:
- ; RPU-NEXT: Live-in: %0:00000000000000F3
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: Live-out: %0:00000000000000C3
- ; RPU-NEXT: Live-thr: %0:00000000000000C0
- ; RPU-NEXT: 1 0
- ; RPU-NEXT: bb.2:
- ; RPU-NEXT: Live-in: %0:00000000000000C3
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ;
- ; RPD-LABEL: name: live_through_test
- ; RPD: bb.0:
- ; RPD-NEXT: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: Live-out: %0:00000000000000F3
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: bb.1:
- ; RPD-NEXT: Live-in: %0:00000000000000F3
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: Live-out: %0:00000000000000C3
- ; RPD-NEXT: Live-thr: %0:00000000000000C0
- ; RPD-NEXT: 1 0
- ; RPD-NEXT: bb.2:
- ; RPD-NEXT: Live-in: %0:00000000000000C3
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
+ ; RP-LABEL: name: live_through_test
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: Live-out: %0:00000000000000F3
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:00000000000000F3
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: Live-out: %0:00000000000000C3
+ ; RP-NEXT: Live-thr: %0:00000000000000C0
+ ; RP-NEXT: 1 0
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-in: %0:00000000000000C3
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
bb.0:
%0:sgpr_128 = IMPLICIT_DEF
bb.1:
@@ -223,7 +182,7 @@ body: |
; RPU-NEXT: 0 7
; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 6
- ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 7
; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 9
@@ -262,7 +221,7 @@ body: |
; RPU-NEXT: 0 12
; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
- ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
; RPU-NEXT: 0 9
@@ -550,7 +509,7 @@ body: |
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
; RPU-NEXT: 0 0
- ; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
+ ; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
; RPU-NEXT: 0 0
@@ -569,7 +528,7 @@ body: |
; RPU-NEXT: 0 1
; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0
; RPU-NEXT: 0 1
- ; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
+ ; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
; RPU-NEXT: 0 1
; RPU-NEXT: Live-out: %0:0000000000000C00
; RPU-NEXT: Live-thr:
@@ -709,33 +668,19 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
- ; RPU-LABEL: name: test_partially_used_early_clobber_def
- ; RPU: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; RPU-NEXT: 4 0
- ; RPU-NEXT: 5 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
- ; RPU-NEXT: 1 0
- ; RPU-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ;
- ; RPD-LABEL: name: test_partially_used_early_clobber_def
- ; RPD: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; RPD-NEXT: 4 0
- ; RPD-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
- ; RPD-NEXT: 1 0
- ; RPD-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
+ ; RP-LABEL: name: test_partially_used_early_clobber_def
+ ; RP: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RP-NEXT: 4 0
+ ; RP-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
+ ; RP-NEXT: 1 0
+ ; RP-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
early-clobber %1:sgpr_128 = COPY %0:sgpr_128
S_NOP 0, implicit %1.sub1
@@ -752,7 +697,7 @@ body: |
; RPU-NEXT: 0 0
; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; RPU-NEXT: 4 0
- ; RPU-NEXT: 7 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
+ ; RPU-NEXT: 16 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
; RPU-NEXT: 6 0
; RPU-NEXT: 6 0 S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128
; RPU-NEXT: 0 0
More information about the llvm-commits
mailing list