[llvm] 547e3cb - [AMDGPU] Improve liveness copying in si-optimize-exec-masking-pre-ra

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 17 01:34:45 PDT 2022


Author: Carl Ritson
Date: 2022-07-17T17:34:05+09:00
New Revision: 547e3cba7d166286c3c4ce477998a81ebbed6921

URL: https://github.com/llvm/llvm-project/commit/547e3cba7d166286c3c4ce477998a81ebbed6921
DIFF: https://github.com/llvm/llvm-project/commit/547e3cba7d166286c3c4ce477998a81ebbed6921.diff

LOG: [AMDGPU] Improve liveness copying in si-optimize-exec-masking-pre-ra

Further improve liveness copying for CC register post optimization
by mirroring live internal splits.
The fixes a bug in register allocation when CC register liveness
is extended across a branches instead of split.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D129557

Added: 
    llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 57dbad468de8..aed84437b890 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -184,6 +184,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
   if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And))
     return false;
 
+  // Cannot safely mirror live intervals with PHI nodes, so check for these
+  // before optimization.
+  SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
+  LiveInterval *SelLI = &LIS->getInterval(SelReg);
+  if (llvm::any_of(SelLI->vnis(),
+                    [](const VNInfo *VNI) {
+                      return VNI->isPHIDef();
+                    }))
+    return false;
+
   // TODO: Guard against implicit def operands?
   LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
                     << *And);
@@ -204,31 +214,34 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
 
   LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
 
-  SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
-  SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
-
-  LiveInterval *CmpLI =
-      CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
-  LiveInterval *SelLI =
-      SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
-
   // Update live intervals for CCReg before potentially removing CmpReg/SelReg,
   // and their associated liveness information.
+  SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
   if (CCReg.isVirtual()) {
-    // Note: this ignores that SelLI might have multiple internal values
-    // or splits and simply extends the live range to cover all cases
-    // where the result of the v_cndmask_b32 was live (e.g. loops).
-    // This could yield worse register allocation in rare edge cases.
-    SlotIndex EndIdx = AndIdx.getRegSlot();
-    if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock())
-      EndIdx = SelLI->endIndex();
+    // Apply live ranges from SelLI to CCReg potentially matching splits
+    // and extending to loop boundaries.
+
+    auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) {
+      // Copy live ranges from SelLI, adjusting start and end as required
+      auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
+      assert(DefSegment != SelLI->end() &&
+             "No live interval segment covering definition?");
+      for (auto I = DefSegment; I != SelLI->end(); ++I) {
+        SlotIndex Start = I->start < SelIdx.getRegSlot() ?
+                          SelIdx.getRegSlot() : I->start;
+        SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?
+                        I->end : AndIdx.getRegSlot();
+        Dst.addSegment(LiveRange::Segment(Start, End, VNI));
+      }
+      // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
+      if (!SelLI->getSegmentContaining(AndIdx.getRegSlot()))
+        Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
+    };
 
     LiveInterval &CCLI = LIS->getInterval(CCReg);
     auto CCQ = CCLI.Query(SelIdx.getRegSlot());
-    if (CCQ.valueIn()) {
-      CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(),
-                                         EndIdx, CCQ.valueIn()));
-    }
+    if (CCQ.valueIn())
+      applyLiveRanges(CCLI, CCQ.valueIn());
 
     if (CC->getSubReg()) {
       LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg());
@@ -237,10 +250,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
           Allocator, Mask,
           [=](LiveInterval::SubRange &SR) {
             auto CCQS = SR.Query(SelIdx.getRegSlot());
-            if (CCQS.valueIn()) {
-              SR.addSegment(LiveRange::Segment(
-                  SelIdx.getRegSlot(), EndIdx, CCQS.valueIn()));
-            }
+            if (CCQS.valueIn())
+              applyLiveRanges(SR, CCQS.valueIn());
           },
           *LIS->getSlotIndexes(), *TRI);
       CCLI.removeEmptySubRanges();
@@ -253,7 +264,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
 
   // Try to remove compare. Cmp value should not used in between of cmp
   // and s_and_b64 if VCC or just unused if any other register.
-  if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
+  LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
+  if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
       (CmpReg == Register(CondReg) &&
        std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
                     [&](const MachineInstr &MI) {
@@ -266,18 +278,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
     Cmp->eraseFromParent();
 
     // Try to remove v_cndmask_b32.
-    if (SelLI) {
-      // Kill status must be checked before shrinking the live range.
-      bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
-      LIS->shrinkToUses(SelLI);
-      bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
-      if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
-        LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
-
-        LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
-        LIS->RemoveMachineInstrFromMaps(*Sel);
-        Sel->eraseFromParent();
-      }
+    // Kill status must be checked before shrinking the live range.
+    bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
+    LIS->shrinkToUses(SelLI);
+    bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
+    if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
+      LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+
+      LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
+      LIS->RemoveMachineInstrFromMaps(*Sel);
+      Sel->eraseFromParent();
     }
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir
new file mode 100644
index 000000000000..703ad549d6cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir
@@ -0,0 +1,293 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -start-before=machine-scheduler -stop-after=virtregrewriter,0 -o - %s | FileCheck %s
+
+---
+# If optimize-exec-mask-pre-ra over approximates live intervals (not replicating splits)
+# then this triggers a register allocation failure.
+
+# CHECK-LABEL: name: test
+
+name:            test
+alignment:       1
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sreg_32, preferred-register: '$vcc_lo' }
+  - { id: 1, class: sreg_32, preferred-register: '$vcc_lo' }
+  - { id: 2, class: sreg_32_xm0_xexec, preferred-register: '$vcc_lo' }
+liveins:
+  - { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr100_sgpr101_sgpr102_sgpr103'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0.entry:
+    liveins: $vgpr0, $sgpr4_sgpr5
+
+    %3:sgpr_64 = COPY $sgpr4_sgpr5
+    %4:vgpr_32 = COPY $vgpr0
+    %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 16, 0
+    S_BITCMP1_B32 %5, 0, implicit-def $scc
+    %6:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    %7:sreg_32 = S_MOV_B32 -1
+    %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 136, 0
+    S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 8, 0
+    %10:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 24, 0
+    %11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 40, 0
+    %12:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 48, 0
+    %13:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 64, 0
+    S_BITCMP1_B32 %11, 0, implicit-def $scc
+    %14:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    S_BITCMP1_B32 %13, 0, implicit-def $scc
+    %15:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    S_BITCMP1_B32 %13, 8, implicit-def $scc
+    %2:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    %16:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 72, 0
+    %17:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 80, 0
+    S_BITCMP1_B32 %17, 0, implicit-def $scc
+    %18:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    S_BITCMP1_B32 %17, 8, implicit-def $scc
+    %19:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    %20:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 88, 0
+    %21:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 104, 0
+    %22:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 120, 0
+    S_BITCMP1_B32 %22, 0, implicit-def $scc
+    %23:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    %24:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 128, 0
+    %25:sreg_64 = S_MOV_B64 0
+    %26:sreg_64_xexec = S_LOAD_DWORDX2_IMM %25, 0, 0
+    %27:sreg_64 = S_MOV_B64_IMM_PSEUDO 4652218415073722368
+    %28:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+    %29:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %14, implicit $exec
+    %30:sreg_64 = S_MOV_B64_IMM_PSEUDO 4358002977218854975
+    undef %31.sub1:sreg_64 = S_MOV_B32 -1064252416
+    %32:sreg_32 = S_OR_B32 %19, %18, implicit-def dead $scc
+    undef %33.sub1:sreg_64 = S_MOV_B32 2146435072
+    %34:sreg_64 = S_MOV_B64_IMM_PSEUDO 4592094252754343337
+    %35:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593089322246397463
+    %36:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593150332132823898
+    %37:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593971714784152002
+    %38:sreg_64 = S_MOV_B64_IMM_PSEUDO 4594710915293070409
+    %39:sreg_64 = S_MOV_B64_IMM_PSEUDO 4595718710613720112
+    %40:sreg_64 = S_MOV_B64_IMM_PSEUDO 4597174419628462798
+    %41:sreg_64 = S_MOV_B64_IMM_PSEUDO 4598818590920614106
+    %42:sreg_64 = S_MOV_B64_IMM_PSEUDO 4600877379321698716
+    %43:sreg_64 = S_MOV_B64_IMM_PSEUDO 4604180019048437077
+    undef %44.sub1:sreg_64 = S_MOV_B32 -1075489451
+    %45:sreg_64 = S_MOV_B64_IMM_PSEUDO 4609176140021203710
+    undef %46.sub1:sreg_64 = S_MOV_B32 -1132807010
+    %47:sreg_64 = S_MOV_B64_IMM_PSEUDO 4508818957471820556
+    %48:sreg_64 = S_MOV_B64_IMM_PSEUDO 4493147761815702327
+    %49:sreg_64 = S_MOV_B64_IMM_PSEUDO 4523617260404727396
+    %50:sreg_64 = S_MOV_B64_IMM_PSEUDO 4537941333260232368
+    %51:sreg_64 = S_MOV_B64_IMM_PSEUDO 4551452160460988270
+    %52:sreg_64 = S_MOV_B64_IMM_PSEUDO 4564047942395279280
+    %53:sreg_64 = S_MOV_B64_IMM_PSEUDO 4575957461383652130
+    %54:sreg_64 = S_MOV_B64_IMM_PSEUDO 4586165620538933921
+    %55:sreg_64 = S_MOV_B64_IMM_PSEUDO 4595172819793696017
+    %56:sreg_64 = S_MOV_B64_IMM_PSEUDO 4602678819172646923
+    undef %57.sub1:sreg_64 = S_MOV_B32 -1101341163
+    %7:sreg_32 = IMPLICIT_DEF
+    %58:sreg_32 = IMPLICIT_DEF
+    %59:sreg_32 = COPY %27.sub0
+    %60:vreg_64 = COPY %28
+    %61:vreg_64 = COPY %28
+    %62:vreg_64 = COPY %28
+    %63:vreg_64 = COPY %28
+    %64:vreg_64 = COPY %28
+    S_BRANCH %bb.3
+
+  bb.2:
+    %65:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+    %66:sreg_32 = S_AND_B32 %65, %7, implicit-def dead $scc
+    $exec_lo = S_MOV_B32_term %66
+    S_CBRANCH_EXECZ %bb.18, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.3:
+    %67:sreg_32 = S_AND_B32 $exec_lo, %6, implicit-def dead $scc
+    $vcc_lo = COPY %67
+    %58:sreg_32 = S_OR_B32 %58, $exec_lo, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.7, implicit killed $vcc
+
+  bb.4:
+    %64:vreg_64 = IMPLICIT_DEF
+    %63:vreg_64 = IMPLICIT_DEF
+    %62:vreg_64 = IMPLICIT_DEF
+    %61:vreg_64 = IMPLICIT_DEF
+    %60:vreg_64 = IMPLICIT_DEF
+    %28:vreg_64 = IMPLICIT_DEF
+    %68:sreg_32 = S_MOV_B32 -1
+    S_BRANCH %bb.9
+
+  bb.5:
+    S_CBRANCH_SCC1 %bb.18, implicit undef $scc
+
+  bb.6:
+    %69:sreg_32_xm0_xexec = S_XOR_B32 %6, -1, implicit-def dead $scc
+    %70:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 96, 0
+    S_BITCMP1_B32 %70, 0, implicit-def $scc
+    %71:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    S_BITCMP1_B32 %8.sub1, 0, implicit-def $scc
+    %72:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    %73:sreg_32_xm0_xexec = S_XOR_B32 %72, -1, implicit-def dead $scc
+    %74:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %73, implicit $exec
+    %75:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %71, implicit $exec
+    %76:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %69, implicit $exec
+    S_BRANCH %bb.14
+
+  bb.7:
+    %77:vreg_64 = COPY %10.sub0_sub1
+    %78:vreg_64 = FLAT_LOAD_DWORDX2 %77, 0, 0, implicit $exec, implicit $flat_scr
+    %79:vreg_64 = COPY %10.sub2_sub3
+    %80:vreg_64 = FLAT_LOAD_DWORDX2 %79, 0, 0, implicit $exec, implicit $flat_scr
+    %81:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %29, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %81, implicit-def dead $scc
+    %82:sreg_64 = COPY %12.sub0_sub1
+    S_CBRANCH_VCCNZ %bb.10, implicit killed $vcc
+    S_BRANCH %bb.8
+
+  bb.8:
+    %82:sreg_64 = S_MOV_B64 0
+    S_BRANCH %bb.10
+
+  bb.9:
+    %83:sreg_32 = S_XOR_B32 %68, -1, implicit-def dead $scc
+    %84:sreg_32 = S_AND_B32 $exec_lo, %58, implicit-def $scc
+    %59:sreg_32 = S_OR_B32 %84, %59, implicit-def $scc
+    %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %85:sreg_32 = S_ANDN2_B32 %7, $exec_lo, implicit-def dead $scc
+    %86:sreg_32 = S_AND_B32 %83, $exec_lo, implicit-def dead $scc
+    %7:sreg_32 = S_OR_B32 %85, %86, implicit-def dead $scc
+    $exec_lo = S_ANDN2_B32_term $exec_lo, %59, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.3, implicit $exec
+    S_BRANCH %bb.19
+
+  bb.10:
+    %87:sreg_64_xexec = S_LOAD_DWORDX2_IMM %26, 16, 0
+    undef %88.sub1:sreg_64 = S_AND_B32 %87.sub1, 2147483647, implicit-def dead $scc
+    %89:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e64 2, %87, 0, 0, implicit $mode, implicit $exec
+    %90:vreg_64 = V_CVT_F64_I32_e32 %89, implicit $mode, implicit $exec
+    %91:vreg_64 = nofpexcept V_FMA_F64_e64 0, 0, 0, %30, 0, %87, 0, 0, implicit $mode, implicit $exec
+    %0:sreg_32 = nofpexcept V_CMP_LT_F64_e64 0, %27, 0, %91, 0, implicit $mode, implicit $exec
+    %31.sub0:sreg_64 = COPY %27.sub0
+    %1:sreg_32 = nofpexcept V_CMP_GT_F64_e64 0, %31, 0, %90, 0, implicit $mode, implicit $exec
+    S_CBRANCH_SCC0 %bb.12, implicit undef $scc
+
+  bb.11:
+    %92:sreg_32_xm0_xexec = S_OR_B32 %1, %0, implicit-def dead $scc
+    undef %93.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %21.sub3, 0, 0, %92, implicit $exec
+    %93.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %21.sub2, 0, 0, %92, implicit $exec
+    S_BRANCH %bb.13
+
+  bb.12:
+    %93:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+
+  bb.13:
+    %88.sub0:sreg_64 = COPY %87.sub0
+    %94:vgpr_32 = COPY %16.sub0
+    %95:vgpr_32 = V_CNDMASK_B32_e64 0, %82.sub0, 0, %94, %2, implicit $exec
+    %96:vgpr_32 = COPY %16.sub1
+    %97:vgpr_32 = V_CNDMASK_B32_e64 0, %82.sub1, 0, %96, %2, implicit $exec
+    %98:vgpr_32 = V_CNDMASK_B32_e64 0, %97, 0, 2146959360, %18, implicit $exec
+    dead %99:sreg_32 = S_AND_B32 %32, $exec_lo, implicit-def $scc
+    %100:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
+    undef %101.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %95, 0, 0, %100, implicit $exec
+    %101.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %98, 0, 0, %19, implicit $exec
+    %64:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %64, 0, %101, 0, 0, implicit $mode, implicit $exec
+    %63:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %63, 0, %21.sub0_sub1, 0, 0, implicit $mode, implicit $exec
+    %62:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %62, 0, %12.sub2_sub3, 0, 0, implicit $mode, implicit $exec
+    %33.sub0:sreg_64 = COPY %27.sub0
+    %102:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, %33, 0, %88, 0, implicit $mode, implicit $exec
+    %103:sreg_32 = nofpexcept V_CMP_EQ_F64_e64 0, 0, 0, %87, 0, implicit $mode, implicit $exec
+    %104:sreg_32 = S_XOR_B32 %15, %103, implicit-def dead $scc
+    dead %105:sreg_32 = S_AND_B32 %104, $exec_lo, implicit-def $scc
+    %106:sgpr_32 = S_CSELECT_B32 0, 2146435072, implicit killed $scc
+    %107:vgpr_32 = V_CNDMASK_B32_e64 0, %93.sub1, 0, %106, %102, implicit $exec
+    undef %108.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %107, 0, 0, %23, implicit $exec
+    %109:sreg_32_xm0_xexec = S_OR_B32 %23, %102, implicit-def dead $scc
+    %108.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %93.sub0, 0, 0, %109, implicit $exec
+    %61:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %61, 0, %108, 0, 0, implicit $mode, implicit $exec
+    %110:vreg_64 = nofpexcept V_FMA_F64_e64 0, %78, 0, %35, 0, %34, 0, 0, implicit $mode, implicit $exec
+    %111:vreg_64 = nofpexcept V_FMA_F64_e64 0, %110, 0, 0, 0, %36, 0, 0, implicit $mode, implicit $exec
+    %112:vreg_64 = nofpexcept V_FMA_F64_e64 0, %111, 0, 0, 0, %37, 0, 0, implicit $mode, implicit $exec
+    %113:vreg_64 = nofpexcept V_FMA_F64_e64 0, %112, 0, 0, 0, %38, 0, 0, implicit $mode, implicit $exec
+    %114:vreg_64 = nofpexcept V_FMA_F64_e64 0, %113, 0, 0, 0, %39, 0, 0, implicit $mode, implicit $exec
+    %115:vreg_64 = nofpexcept V_FMA_F64_e64 0, %114, 0, 0, 0, %40, 0, 0, implicit $mode, implicit $exec
+    %116:vreg_64 = nofpexcept V_FMA_F64_e64 0, %115, 0, 0, 0, %41, 0, 0, implicit $mode, implicit $exec
+    %117:vreg_64 = nofpexcept V_FMA_F64_e64 0, %116, 0, 0, 0, %42, 0, 0, implicit $mode, implicit $exec
+    %118:vreg_64 = nofpexcept V_ADD_F64_e64 0, %117, 0, %43, 0, 0, implicit $mode, implicit $exec
+    %44.sub0:sreg_64 = COPY %43.sub0
+    %119:vreg_64 = nofpexcept V_ADD_F64_e64 0, %118, 0, %44, 0, 0, implicit $mode, implicit $exec
+    %120:vreg_64 = nofpexcept V_ADD_F64_e64 0, %24, 0, %119, 0, 0, implicit $mode, implicit $exec
+    %121:vreg_64 = nofpexcept V_MUL_F64_e64 0, %9, 0, %120, 0, 0, implicit $mode, implicit $exec
+    %122:vreg_64 = nofpexcept V_MUL_F64_e64 0, %121, 0, %45, 0, 0, implicit $mode, implicit $exec
+    %46.sub0:sreg_64 = COPY %30.sub0
+    %123:vreg_64 = nofpexcept V_FMA_F64_e64 0, %122, 0, %46, 0, %20, 0, 0, implicit $mode, implicit $exec
+    %124:vreg_64 = nofpexcept V_FMA_F64_e64 0, %123, 0, %48, 0, %47, 0, 0, implicit $mode, implicit $exec
+    %125:vreg_64 = nofpexcept V_FMA_F64_e64 0, %124, 0, 0, 0, %49, 0, 0, implicit $mode, implicit $exec
+    %126:vreg_64 = nofpexcept V_FMA_F64_e64 0, %125, 0, 0, 0, %50, 0, 0, implicit $mode, implicit $exec
+    %127:vreg_64 = nofpexcept V_FMA_F64_e64 0, %126, 0, 0, 0, %51, 0, 0, implicit $mode, implicit $exec
+    %128:vreg_64 = nofpexcept V_FMA_F64_e64 0, %127, 0, 0, 0, %52, 0, 0, implicit $mode, implicit $exec
+    %129:vreg_64 = nofpexcept V_FMA_F64_e64 0, %128, 0, 0, 0, %53, 0, 0, implicit $mode, implicit $exec
+    %130:vreg_64 = nofpexcept V_FMA_F64_e64 0, %129, 0, 0, 0, %54, 0, 0, implicit $mode, implicit $exec
+    %131:vreg_64 = nofpexcept V_FMA_F64_e64 0, %130, 0, 0, 0, %55, 0, 0, implicit $mode, implicit $exec
+    %132:vreg_64 = nofpexcept V_FMA_F64_e64 0, %131, 0, 0, 0, %56, 0, 0, implicit $mode, implicit $exec
+    %60:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %60, 0, %132, 0, 0, implicit $mode, implicit $exec
+    %133:vreg_64 = nofpexcept V_FMA_F64_e64 0, %80, 0, 0, 0, %36, 0, 0, implicit $mode, implicit $exec
+    %134:vreg_64 = nofpexcept V_FMA_F64_e64 0, %133, 0, 0, 0, %37, 0, 0, implicit $mode, implicit $exec
+    %135:vreg_64 = nofpexcept V_FMA_F64_e64 0, %134, 0, 0, 0, %38, 0, 0, implicit $mode, implicit $exec
+    %136:vreg_64 = nofpexcept V_FMA_F64_e64 0, %135, 0, 0, 0, %39, 0, 0, implicit $mode, implicit $exec
+    %137:vreg_64 = nofpexcept V_FMA_F64_e64 0, %136, 0, 0, 0, %40, 0, 0, implicit $mode, implicit $exec
+    %138:vreg_64 = nofpexcept V_FMA_F64_e64 0, %137, 0, 0, 0, %41, 0, 0, implicit $mode, implicit $exec
+    %139:vreg_64 = nofpexcept V_FMA_F64_e64 0, %138, 0, 0, 0, %42, 0, 0, implicit $mode, implicit $exec
+    %140:vreg_64 = nofpexcept V_MUL_F64_e64 0, %139, 0, %45, 0, 0, implicit $mode, implicit $exec
+    %57.sub0:sreg_64 = COPY %48.sub0
+    %141:vreg_64 = nofpexcept V_FMA_F64_e64 0, %140, 0, %57, 0, %47, 0, 0, implicit $mode, implicit $exec
+    %142:vreg_64 = nofpexcept V_FMA_F64_e64 0, %141, 0, 0, 0, %49, 0, 0, implicit $mode, implicit $exec
+    %143:vreg_64 = nofpexcept V_FMA_F64_e64 0, %142, 0, 0, 0, %50, 0, 0, implicit $mode, implicit $exec
+    %144:vreg_64 = nofpexcept V_FMA_F64_e64 0, %143, 0, 0, 0, %51, 0, 0, implicit $mode, implicit $exec
+    %145:vreg_64 = nofpexcept V_FMA_F64_e64 0, %144, 0, 0, 0, %52, 0, 0, implicit $mode, implicit $exec
+    %146:vreg_64 = nofpexcept V_FMA_F64_e64 0, %145, 0, 0, 0, %53, 0, 0, implicit $mode, implicit $exec
+    %147:vreg_64 = nofpexcept V_FMA_F64_e64 0, %146, 0, 0, 0, %54, 0, 0, implicit $mode, implicit $exec
+    %148:vreg_64 = nofpexcept V_FMA_F64_e64 0, %147, 0, 0, 0, %55, 0, 0, implicit $mode, implicit $exec
+    %149:vreg_64 = nofpexcept V_FMA_F64_e64 0, %148, 0, 0, 0, %56, 0, 0, implicit $mode, implicit $exec
+    %28:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %28, 0, %149, 0, 0, implicit $mode, implicit $exec
+    %58:sreg_32 = V_CMP_LE_U32_e64 %8.sub0, %4, implicit $exec
+    %68:sreg_32 = S_MOV_B32 0
+    S_BRANCH %bb.9
+
+  bb.14:
+    S_CBRANCH_SCC1 %bb.17, implicit undef $scc
+    S_BRANCH %bb.15
+
+  bb.15:
+    %150:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %74, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %150, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.17, implicit killed $vcc
+    S_BRANCH %bb.16
+
+  bb.16:
+    %151:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %75, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %151, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.16, implicit killed $vcc
+    S_BRANCH %bb.17
+
+  bb.17:
+    %152:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %76, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %152, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.14, implicit killed $vcc
+    S_BRANCH %bb.18
+
+  bb.18:
+    $exec_lo = S_OR_B32 $exec_lo, %65, implicit-def $scc
+    S_ENDPGM 0
+
+  bb.19:
+    $exec_lo = S_OR_B32 $exec_lo, %59, implicit-def $scc
+    S_BRANCH %bb.2
+
+...


        


More information about the llvm-commits mailing list