[llvm] [AMDGPU] Do not remat instructions with PhysReg uses (PR #124366)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 15:30:17 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jeffrey Byrnes (jrbyrnes)
<details>
<summary>Changes</summary>
This blocks rematerialization during scheduling if the instruction has a non accepted PhysReg use.
Currently, there aren't any checks like this in place, and we may create invalid code: https://godbolt.org/z/xjPjdcorf
---
Patch is 39.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124366.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+10-2)
- (modified) llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir (+408-48)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index b00105ae9bd528..6e693066de10b6 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1842,15 +1842,23 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
return true;
}
-// Copied from MachineLICM
bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
if (!DAG.TII->isTriviallyReMaterializable(MI))
return false;
- for (const MachineOperand &MO : MI.all_uses())
+ for (const MachineOperand &MO : MI.all_uses()) {
if (MO.getReg().isVirtual())
return false;
+ // We can't remat physreg uses, unless it is a constant or an ignorable
+ // use (e.g. implicit exec use on VALU instructions)
+ if (MO.getReg().isPhysical()) {
+ if (DAG.MRI.isConstantPhysReg(MO.getReg()) || DAG.TII->isIgnorableUse(MO))
+ continue;
+ return false;
+ }
+ }
+
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 9f264de531950b..7662abc0aaf858 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -84,13 +84,11 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
S_NOP 0
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -191,14 +189,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -300,7 +296,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -308,7 +303,6 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -408,7 +402,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -416,7 +409,6 @@ body: |
S_NOP 0, implicit %22, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -529,7 +521,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -537,14 +528,12 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
%25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -552,7 +541,6 @@ body: |
S_NOP 0, implicit %25
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -666,7 +654,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -674,7 +661,6 @@ body: |
S_NOP 0, implicit %23, implicit %22
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
%25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
@@ -682,7 +668,6 @@ body: |
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
@@ -690,7 +675,6 @@ body: |
S_NOP 0, implicit %25, implicit %26
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -949,14 +933,12 @@ body: |
undef %23.sub0:vreg_64 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23.sub1:vreg_64 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -1053,7 +1035,6 @@ body: |
undef %21.sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
@@ -1062,7 +1043,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -1581,7 +1561,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -1589,7 +1568,6 @@ body: |
S_NOP 0, implicit %24, implicit %25
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -2528,14 +2506,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
S_NOP 0, implicit %23
@@ -2543,7 +2519,6 @@ body: |
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -2551,7 +2526,6 @@ body: |
S_NOP 0, implicit %26, implicit %27
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %25
S_NOP 0, implicit %0, implicit %1
@@ -2650,7 +2624,6 @@ body: |
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -2658,7 +2631,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -2759,7 +2731,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -2767,7 +2738,6 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -5030,7 +5000,6 @@ body: |
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5038,7 +5007,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5137,14 +5105,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %23, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5242,7 +5208,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5250,7 +5215,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5348,7 +5312,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5357,7 +5320,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5456,7 +5418,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5466,7 +5427,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5562,14 +5522,12 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %22, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5669,14 +5627,12 @@ body: |
undef %23.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23.sub1
S_NOP 0, implicit %0, implicit %1
@@ -5779,14 +5735,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
DBG_VALUE %23, 0, 0
S_NOP 0, implicit %23
@@ -5889,14 +5843,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -5914,3 +5866,411 @@ body: |
S_ENDPGM 0
...
+---
+name: test_occ_8_physreg_use
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: test_occ_8_physreg_use
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
+ ; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_9]], implicit [[V_CVT_I32_F32_e32_17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_10]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124366
More information about the llvm-commits
mailing list