[llvm] 547e3cb - [AMDGPU] Improve liveness copying in si-optimize-exec-masking-pre-ra
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 17 01:34:45 PDT 2022
Author: Carl Ritson
Date: 2022-07-17T17:34:05+09:00
New Revision: 547e3cba7d166286c3c4ce477998a81ebbed6921
URL: https://github.com/llvm/llvm-project/commit/547e3cba7d166286c3c4ce477998a81ebbed6921
DIFF: https://github.com/llvm/llvm-project/commit/547e3cba7d166286c3c4ce477998a81ebbed6921.diff
LOG: [AMDGPU] Improve liveness copying in si-optimize-exec-masking-pre-ra
Further improve liveness copying for CC register post optimization
by mirroring live internal splits.
The fixes a bug in register allocation when CC register liveness
is extended across a branches instead of split.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D129557
Added:
llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir
Modified:
llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 57dbad468de8..aed84437b890 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -184,6 +184,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And))
return false;
+ // Cannot safely mirror live intervals with PHI nodes, so check for these
+ // before optimization.
+ SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
+ LiveInterval *SelLI = &LIS->getInterval(SelReg);
+ if (llvm::any_of(SelLI->vnis(),
+ [](const VNInfo *VNI) {
+ return VNI->isPHIDef();
+ }))
+ return false;
+
// TODO: Guard against implicit def operands?
LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
<< *And);
@@ -204,31 +214,34 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
- SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
- SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
-
- LiveInterval *CmpLI =
- CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
- LiveInterval *SelLI =
- SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
-
// Update live intervals for CCReg before potentially removing CmpReg/SelReg,
// and their associated liveness information.
+ SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
if (CCReg.isVirtual()) {
- // Note: this ignores that SelLI might have multiple internal values
- // or splits and simply extends the live range to cover all cases
- // where the result of the v_cndmask_b32 was live (e.g. loops).
- // This could yield worse register allocation in rare edge cases.
- SlotIndex EndIdx = AndIdx.getRegSlot();
- if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock())
- EndIdx = SelLI->endIndex();
+ // Apply live ranges from SelLI to CCReg potentially matching splits
+ // and extending to loop boundaries.
+
+ auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) {
+ // Copy live ranges from SelLI, adjusting start and end as required
+ auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
+ assert(DefSegment != SelLI->end() &&
+ "No live interval segment covering definition?");
+ for (auto I = DefSegment; I != SelLI->end(); ++I) {
+ SlotIndex Start = I->start < SelIdx.getRegSlot() ?
+ SelIdx.getRegSlot() : I->start;
+ SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?
+ I->end : AndIdx.getRegSlot();
+ Dst.addSegment(LiveRange::Segment(Start, End, VNI));
+ }
+ // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
+ if (!SelLI->getSegmentContaining(AndIdx.getRegSlot()))
+ Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
+ };
LiveInterval &CCLI = LIS->getInterval(CCReg);
auto CCQ = CCLI.Query(SelIdx.getRegSlot());
- if (CCQ.valueIn()) {
- CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(),
- EndIdx, CCQ.valueIn()));
- }
+ if (CCQ.valueIn())
+ applyLiveRanges(CCLI, CCQ.valueIn());
if (CC->getSubReg()) {
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg());
@@ -237,10 +250,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
Allocator, Mask,
[=](LiveInterval::SubRange &SR) {
auto CCQS = SR.Query(SelIdx.getRegSlot());
- if (CCQS.valueIn()) {
- SR.addSegment(LiveRange::Segment(
- SelIdx.getRegSlot(), EndIdx, CCQS.valueIn()));
- }
+ if (CCQS.valueIn())
+ applyLiveRanges(SR, CCQS.valueIn());
},
*LIS->getSlotIndexes(), *TRI);
CCLI.removeEmptySubRanges();
@@ -253,7 +264,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
// Try to remove compare. Cmp value should not used in between of cmp
// and s_and_b64 if VCC or just unused if any other register.
- if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
+ LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
+ if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
(CmpReg == Register(CondReg) &&
std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
[&](const MachineInstr &MI) {
@@ -266,18 +278,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
Cmp->eraseFromParent();
// Try to remove v_cndmask_b32.
- if (SelLI) {
- // Kill status must be checked before shrinking the live range.
- bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
- LIS->shrinkToUses(SelLI);
- bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
- if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
- LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
-
- LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
- LIS->RemoveMachineInstrFromMaps(*Sel);
- Sel->eraseFromParent();
- }
+ // Kill status must be checked before shrinking the live range.
+ bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
+ LIS->shrinkToUses(SelLI);
+ bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
+ if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+
+ LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
+ LIS->RemoveMachineInstrFromMaps(*Sel);
+ Sel->eraseFromParent();
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir
new file mode 100644
index 000000000000..703ad549d6cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir
@@ -0,0 +1,293 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -start-before=machine-scheduler -stop-after=virtregrewriter,0 -o - %s | FileCheck %s
+
+---
+# If optimize-exec-mask-pre-ra over approximates live intervals (not replicating splits)
+# then this triggers a register allocation failure.
+
+# CHECK-LABEL: name: test
+
+name: test
+alignment: 1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_32, preferred-register: '$vcc_lo' }
+ - { id: 1, class: sreg_32, preferred-register: '$vcc_lo' }
+ - { id: 2, class: sreg_32_xm0_xexec, preferred-register: '$vcc_lo' }
+liveins:
+ - { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0.entry:
+ liveins: $vgpr0, $sgpr4_sgpr5
+
+ %3:sgpr_64 = COPY $sgpr4_sgpr5
+ %4:vgpr_32 = COPY $vgpr0
+ %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 16, 0
+ S_BITCMP1_B32 %5, 0, implicit-def $scc
+ %6:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ %7:sreg_32 = S_MOV_B32 -1
+ %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 136, 0
+ S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 8, 0
+ %10:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 24, 0
+ %11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 40, 0
+ %12:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 48, 0
+ %13:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 64, 0
+ S_BITCMP1_B32 %11, 0, implicit-def $scc
+ %14:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ S_BITCMP1_B32 %13, 0, implicit-def $scc
+ %15:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ S_BITCMP1_B32 %13, 8, implicit-def $scc
+ %2:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ %16:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 72, 0
+ %17:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 80, 0
+ S_BITCMP1_B32 %17, 0, implicit-def $scc
+ %18:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ S_BITCMP1_B32 %17, 8, implicit-def $scc
+ %19:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ %20:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 88, 0
+ %21:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 104, 0
+ %22:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 120, 0
+ S_BITCMP1_B32 %22, 0, implicit-def $scc
+ %23:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ %24:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 128, 0
+ %25:sreg_64 = S_MOV_B64 0
+ %26:sreg_64_xexec = S_LOAD_DWORDX2_IMM %25, 0, 0
+ %27:sreg_64 = S_MOV_B64_IMM_PSEUDO 4652218415073722368
+ %28:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ %29:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %14, implicit $exec
+ %30:sreg_64 = S_MOV_B64_IMM_PSEUDO 4358002977218854975
+ undef %31.sub1:sreg_64 = S_MOV_B32 -1064252416
+ %32:sreg_32 = S_OR_B32 %19, %18, implicit-def dead $scc
+ undef %33.sub1:sreg_64 = S_MOV_B32 2146435072
+ %34:sreg_64 = S_MOV_B64_IMM_PSEUDO 4592094252754343337
+ %35:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593089322246397463
+ %36:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593150332132823898
+ %37:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593971714784152002
+ %38:sreg_64 = S_MOV_B64_IMM_PSEUDO 4594710915293070409
+ %39:sreg_64 = S_MOV_B64_IMM_PSEUDO 4595718710613720112
+ %40:sreg_64 = S_MOV_B64_IMM_PSEUDO 4597174419628462798
+ %41:sreg_64 = S_MOV_B64_IMM_PSEUDO 4598818590920614106
+ %42:sreg_64 = S_MOV_B64_IMM_PSEUDO 4600877379321698716
+ %43:sreg_64 = S_MOV_B64_IMM_PSEUDO 4604180019048437077
+ undef %44.sub1:sreg_64 = S_MOV_B32 -1075489451
+ %45:sreg_64 = S_MOV_B64_IMM_PSEUDO 4609176140021203710
+ undef %46.sub1:sreg_64 = S_MOV_B32 -1132807010
+ %47:sreg_64 = S_MOV_B64_IMM_PSEUDO 4508818957471820556
+ %48:sreg_64 = S_MOV_B64_IMM_PSEUDO 4493147761815702327
+ %49:sreg_64 = S_MOV_B64_IMM_PSEUDO 4523617260404727396
+ %50:sreg_64 = S_MOV_B64_IMM_PSEUDO 4537941333260232368
+ %51:sreg_64 = S_MOV_B64_IMM_PSEUDO 4551452160460988270
+ %52:sreg_64 = S_MOV_B64_IMM_PSEUDO 4564047942395279280
+ %53:sreg_64 = S_MOV_B64_IMM_PSEUDO 4575957461383652130
+ %54:sreg_64 = S_MOV_B64_IMM_PSEUDO 4586165620538933921
+ %55:sreg_64 = S_MOV_B64_IMM_PSEUDO 4595172819793696017
+ %56:sreg_64 = S_MOV_B64_IMM_PSEUDO 4602678819172646923
+ undef %57.sub1:sreg_64 = S_MOV_B32 -1101341163
+ %7:sreg_32 = IMPLICIT_DEF
+ %58:sreg_32 = IMPLICIT_DEF
+ %59:sreg_32 = COPY %27.sub0
+ %60:vreg_64 = COPY %28
+ %61:vreg_64 = COPY %28
+ %62:vreg_64 = COPY %28
+ %63:vreg_64 = COPY %28
+ %64:vreg_64 = COPY %28
+ S_BRANCH %bb.3
+
+ bb.2:
+ %65:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ %66:sreg_32 = S_AND_B32 %65, %7, implicit-def dead $scc
+ $exec_lo = S_MOV_B32_term %66
+ S_CBRANCH_EXECZ %bb.18, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.3:
+ %67:sreg_32 = S_AND_B32 $exec_lo, %6, implicit-def dead $scc
+ $vcc_lo = COPY %67
+ %58:sreg_32 = S_OR_B32 %58, $exec_lo, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.7, implicit killed $vcc
+
+ bb.4:
+ %64:vreg_64 = IMPLICIT_DEF
+ %63:vreg_64 = IMPLICIT_DEF
+ %62:vreg_64 = IMPLICIT_DEF
+ %61:vreg_64 = IMPLICIT_DEF
+ %60:vreg_64 = IMPLICIT_DEF
+ %28:vreg_64 = IMPLICIT_DEF
+ %68:sreg_32 = S_MOV_B32 -1
+ S_BRANCH %bb.9
+
+ bb.5:
+ S_CBRANCH_SCC1 %bb.18, implicit undef $scc
+
+ bb.6:
+ %69:sreg_32_xm0_xexec = S_XOR_B32 %6, -1, implicit-def dead $scc
+ %70:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 96, 0
+ S_BITCMP1_B32 %70, 0, implicit-def $scc
+ %71:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ S_BITCMP1_B32 %8.sub1, 0, implicit-def $scc
+ %72:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ %73:sreg_32_xm0_xexec = S_XOR_B32 %72, -1, implicit-def dead $scc
+ %74:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %73, implicit $exec
+ %75:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %71, implicit $exec
+ %76:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %69, implicit $exec
+ S_BRANCH %bb.14
+
+ bb.7:
+ %77:vreg_64 = COPY %10.sub0_sub1
+ %78:vreg_64 = FLAT_LOAD_DWORDX2 %77, 0, 0, implicit $exec, implicit $flat_scr
+ %79:vreg_64 = COPY %10.sub2_sub3
+ %80:vreg_64 = FLAT_LOAD_DWORDX2 %79, 0, 0, implicit $exec, implicit $flat_scr
+ %81:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %29, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %81, implicit-def dead $scc
+ %82:sreg_64 = COPY %12.sub0_sub1
+ S_CBRANCH_VCCNZ %bb.10, implicit killed $vcc
+ S_BRANCH %bb.8
+
+ bb.8:
+ %82:sreg_64 = S_MOV_B64 0
+ S_BRANCH %bb.10
+
+ bb.9:
+ %83:sreg_32 = S_XOR_B32 %68, -1, implicit-def dead $scc
+ %84:sreg_32 = S_AND_B32 $exec_lo, %58, implicit-def $scc
+ %59:sreg_32 = S_OR_B32 %84, %59, implicit-def $scc
+ %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %85:sreg_32 = S_ANDN2_B32 %7, $exec_lo, implicit-def dead $scc
+ %86:sreg_32 = S_AND_B32 %83, $exec_lo, implicit-def dead $scc
+ %7:sreg_32 = S_OR_B32 %85, %86, implicit-def dead $scc
+ $exec_lo = S_ANDN2_B32_term $exec_lo, %59, implicit-def $scc
+ S_CBRANCH_EXECNZ %bb.3, implicit $exec
+ S_BRANCH %bb.19
+
+ bb.10:
+ %87:sreg_64_xexec = S_LOAD_DWORDX2_IMM %26, 16, 0
+ undef %88.sub1:sreg_64 = S_AND_B32 %87.sub1, 2147483647, implicit-def dead $scc
+ %89:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e64 2, %87, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_64 = V_CVT_F64_I32_e32 %89, implicit $mode, implicit $exec
+ %91:vreg_64 = nofpexcept V_FMA_F64_e64 0, 0, 0, %30, 0, %87, 0, 0, implicit $mode, implicit $exec
+ %0:sreg_32 = nofpexcept V_CMP_LT_F64_e64 0, %27, 0, %91, 0, implicit $mode, implicit $exec
+ %31.sub0:sreg_64 = COPY %27.sub0
+ %1:sreg_32 = nofpexcept V_CMP_GT_F64_e64 0, %31, 0, %90, 0, implicit $mode, implicit $exec
+ S_CBRANCH_SCC0 %bb.12, implicit undef $scc
+
+ bb.11:
+ %92:sreg_32_xm0_xexec = S_OR_B32 %1, %0, implicit-def dead $scc
+ undef %93.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %21.sub3, 0, 0, %92, implicit $exec
+ %93.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %21.sub2, 0, 0, %92, implicit $exec
+ S_BRANCH %bb.13
+
+ bb.12:
+ %93:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+
+ bb.13:
+ %88.sub0:sreg_64 = COPY %87.sub0
+ %94:vgpr_32 = COPY %16.sub0
+ %95:vgpr_32 = V_CNDMASK_B32_e64 0, %82.sub0, 0, %94, %2, implicit $exec
+ %96:vgpr_32 = COPY %16.sub1
+ %97:vgpr_32 = V_CNDMASK_B32_e64 0, %82.sub1, 0, %96, %2, implicit $exec
+ %98:vgpr_32 = V_CNDMASK_B32_e64 0, %97, 0, 2146959360, %18, implicit $exec
+ dead %99:sreg_32 = S_AND_B32 %32, $exec_lo, implicit-def $scc
+ %100:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+ undef %101.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %95, 0, 0, %100, implicit $exec
+ %101.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %98, 0, 0, %19, implicit $exec
+ %64:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %64, 0, %101, 0, 0, implicit $mode, implicit $exec
+ %63:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %63, 0, %21.sub0_sub1, 0, 0, implicit $mode, implicit $exec
+ %62:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %62, 0, %12.sub2_sub3, 0, 0, implicit $mode, implicit $exec
+ %33.sub0:sreg_64 = COPY %27.sub0
+ %102:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, %33, 0, %88, 0, implicit $mode, implicit $exec
+ %103:sreg_32 = nofpexcept V_CMP_EQ_F64_e64 0, 0, 0, %87, 0, implicit $mode, implicit $exec
+ %104:sreg_32 = S_XOR_B32 %15, %103, implicit-def dead $scc
+ dead %105:sreg_32 = S_AND_B32 %104, $exec_lo, implicit-def $scc
+ %106:sgpr_32 = S_CSELECT_B32 0, 2146435072, implicit killed $scc
+ %107:vgpr_32 = V_CNDMASK_B32_e64 0, %93.sub1, 0, %106, %102, implicit $exec
+ undef %108.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %107, 0, 0, %23, implicit $exec
+ %109:sreg_32_xm0_xexec = S_OR_B32 %23, %102, implicit-def dead $scc
+ %108.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %93.sub0, 0, 0, %109, implicit $exec
+ %61:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %61, 0, %108, 0, 0, implicit $mode, implicit $exec
+ %110:vreg_64 = nofpexcept V_FMA_F64_e64 0, %78, 0, %35, 0, %34, 0, 0, implicit $mode, implicit $exec
+ %111:vreg_64 = nofpexcept V_FMA_F64_e64 0, %110, 0, 0, 0, %36, 0, 0, implicit $mode, implicit $exec
+ %112:vreg_64 = nofpexcept V_FMA_F64_e64 0, %111, 0, 0, 0, %37, 0, 0, implicit $mode, implicit $exec
+ %113:vreg_64 = nofpexcept V_FMA_F64_e64 0, %112, 0, 0, 0, %38, 0, 0, implicit $mode, implicit $exec
+ %114:vreg_64 = nofpexcept V_FMA_F64_e64 0, %113, 0, 0, 0, %39, 0, 0, implicit $mode, implicit $exec
+ %115:vreg_64 = nofpexcept V_FMA_F64_e64 0, %114, 0, 0, 0, %40, 0, 0, implicit $mode, implicit $exec
+ %116:vreg_64 = nofpexcept V_FMA_F64_e64 0, %115, 0, 0, 0, %41, 0, 0, implicit $mode, implicit $exec
+ %117:vreg_64 = nofpexcept V_FMA_F64_e64 0, %116, 0, 0, 0, %42, 0, 0, implicit $mode, implicit $exec
+ %118:vreg_64 = nofpexcept V_ADD_F64_e64 0, %117, 0, %43, 0, 0, implicit $mode, implicit $exec
+ %44.sub0:sreg_64 = COPY %43.sub0
+ %119:vreg_64 = nofpexcept V_ADD_F64_e64 0, %118, 0, %44, 0, 0, implicit $mode, implicit $exec
+ %120:vreg_64 = nofpexcept V_ADD_F64_e64 0, %24, 0, %119, 0, 0, implicit $mode, implicit $exec
+ %121:vreg_64 = nofpexcept V_MUL_F64_e64 0, %9, 0, %120, 0, 0, implicit $mode, implicit $exec
+ %122:vreg_64 = nofpexcept V_MUL_F64_e64 0, %121, 0, %45, 0, 0, implicit $mode, implicit $exec
+ %46.sub0:sreg_64 = COPY %30.sub0
+ %123:vreg_64 = nofpexcept V_FMA_F64_e64 0, %122, 0, %46, 0, %20, 0, 0, implicit $mode, implicit $exec
+ %124:vreg_64 = nofpexcept V_FMA_F64_e64 0, %123, 0, %48, 0, %47, 0, 0, implicit $mode, implicit $exec
+ %125:vreg_64 = nofpexcept V_FMA_F64_e64 0, %124, 0, 0, 0, %49, 0, 0, implicit $mode, implicit $exec
+ %126:vreg_64 = nofpexcept V_FMA_F64_e64 0, %125, 0, 0, 0, %50, 0, 0, implicit $mode, implicit $exec
+ %127:vreg_64 = nofpexcept V_FMA_F64_e64 0, %126, 0, 0, 0, %51, 0, 0, implicit $mode, implicit $exec
+ %128:vreg_64 = nofpexcept V_FMA_F64_e64 0, %127, 0, 0, 0, %52, 0, 0, implicit $mode, implicit $exec
+ %129:vreg_64 = nofpexcept V_FMA_F64_e64 0, %128, 0, 0, 0, %53, 0, 0, implicit $mode, implicit $exec
+ %130:vreg_64 = nofpexcept V_FMA_F64_e64 0, %129, 0, 0, 0, %54, 0, 0, implicit $mode, implicit $exec
+ %131:vreg_64 = nofpexcept V_FMA_F64_e64 0, %130, 0, 0, 0, %55, 0, 0, implicit $mode, implicit $exec
+ %132:vreg_64 = nofpexcept V_FMA_F64_e64 0, %131, 0, 0, 0, %56, 0, 0, implicit $mode, implicit $exec
+ %60:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %60, 0, %132, 0, 0, implicit $mode, implicit $exec
+ %133:vreg_64 = nofpexcept V_FMA_F64_e64 0, %80, 0, 0, 0, %36, 0, 0, implicit $mode, implicit $exec
+ %134:vreg_64 = nofpexcept V_FMA_F64_e64 0, %133, 0, 0, 0, %37, 0, 0, implicit $mode, implicit $exec
+ %135:vreg_64 = nofpexcept V_FMA_F64_e64 0, %134, 0, 0, 0, %38, 0, 0, implicit $mode, implicit $exec
+ %136:vreg_64 = nofpexcept V_FMA_F64_e64 0, %135, 0, 0, 0, %39, 0, 0, implicit $mode, implicit $exec
+ %137:vreg_64 = nofpexcept V_FMA_F64_e64 0, %136, 0, 0, 0, %40, 0, 0, implicit $mode, implicit $exec
+ %138:vreg_64 = nofpexcept V_FMA_F64_e64 0, %137, 0, 0, 0, %41, 0, 0, implicit $mode, implicit $exec
+ %139:vreg_64 = nofpexcept V_FMA_F64_e64 0, %138, 0, 0, 0, %42, 0, 0, implicit $mode, implicit $exec
+ %140:vreg_64 = nofpexcept V_MUL_F64_e64 0, %139, 0, %45, 0, 0, implicit $mode, implicit $exec
+ %57.sub0:sreg_64 = COPY %48.sub0
+ %141:vreg_64 = nofpexcept V_FMA_F64_e64 0, %140, 0, %57, 0, %47, 0, 0, implicit $mode, implicit $exec
+ %142:vreg_64 = nofpexcept V_FMA_F64_e64 0, %141, 0, 0, 0, %49, 0, 0, implicit $mode, implicit $exec
+ %143:vreg_64 = nofpexcept V_FMA_F64_e64 0, %142, 0, 0, 0, %50, 0, 0, implicit $mode, implicit $exec
+ %144:vreg_64 = nofpexcept V_FMA_F64_e64 0, %143, 0, 0, 0, %51, 0, 0, implicit $mode, implicit $exec
+ %145:vreg_64 = nofpexcept V_FMA_F64_e64 0, %144, 0, 0, 0, %52, 0, 0, implicit $mode, implicit $exec
+ %146:vreg_64 = nofpexcept V_FMA_F64_e64 0, %145, 0, 0, 0, %53, 0, 0, implicit $mode, implicit $exec
+ %147:vreg_64 = nofpexcept V_FMA_F64_e64 0, %146, 0, 0, 0, %54, 0, 0, implicit $mode, implicit $exec
+ %148:vreg_64 = nofpexcept V_FMA_F64_e64 0, %147, 0, 0, 0, %55, 0, 0, implicit $mode, implicit $exec
+ %149:vreg_64 = nofpexcept V_FMA_F64_e64 0, %148, 0, 0, 0, %56, 0, 0, implicit $mode, implicit $exec
+ %28:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %28, 0, %149, 0, 0, implicit $mode, implicit $exec
+ %58:sreg_32 = V_CMP_LE_U32_e64 %8.sub0, %4, implicit $exec
+ %68:sreg_32 = S_MOV_B32 0
+ S_BRANCH %bb.9
+
+ bb.14:
+ S_CBRANCH_SCC1 %bb.17, implicit undef $scc
+ S_BRANCH %bb.15
+
+ bb.15:
+ %150:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %74, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %150, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.17, implicit killed $vcc
+ S_BRANCH %bb.16
+
+ bb.16:
+ %151:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %75, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %151, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.16, implicit killed $vcc
+ S_BRANCH %bb.17
+
+ bb.17:
+ %152:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %76, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %152, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.14, implicit killed $vcc
+ S_BRANCH %bb.18
+
+ bb.18:
+ $exec_lo = S_OR_B32 $exec_lo, %65, implicit-def $scc
+ S_ENDPGM 0
+
+ bb.19:
+ $exec_lo = S_OR_B32 $exec_lo, %59, implicit-def $scc
+ S_BRANCH %bb.2
+
+...
More information about the llvm-commits
mailing list