[llvm] 361e9ee - [AMDGPU] Corrrectly emit AGPR copies in tryFoldPhiAGPR

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 12 23:55:27 PDT 2023


Author: pvanhout
Date: 2023-07-13T08:55:22+02:00
New Revision: 361e9eec515a55cccd11728c253eb5aa4d7cf7fd

URL: https://github.com/llvm/llvm-project/commit/361e9eec515a55cccd11728c253eb5aa4d7cf7fd
DIFF: https://github.com/llvm/llvm-project/commit/361e9eec515a55cccd11728c253eb5aa4d7cf7fd.diff

LOG: [AMDGPU] Corrrectly emit AGPR copies in  tryFoldPhiAGPR

- Don't create COPY instructions between PHI nodes.
- Don't create V_ACCVGPR_WRITE with operands that aren't AGPR_32

Solves SWDEV-410408

Reviewed By: #amdgpu, arsenm

Differential Revision: https://reviews.llvm.org/D155080

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a10a1b7710c757..9f1d6038f1b6d7 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1744,6 +1744,8 @@ bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
   if (!ARC)
     return false;
 
+  bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
+
   // Rewrite the PHI's incoming values to ARC.
   LLVM_DEBUG(dbgs() << "Folding AGPR copies into: " << PHI);
   for (unsigned K = 1; K < PHI.getNumExplicitOperands(); K += 2) {
@@ -1754,7 +1756,7 @@ bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
     MachineBasicBlock *InsertMBB = nullptr;
 
     // Look at the def of Reg, ignoring all copies.
-    bool UseAccVGPRWrite = false;
+    unsigned CopyOpc = AMDGPU::COPY;
     if (MachineInstr *Def = MRI->getVRegDef(Reg)) {
 
       // Look at pre-existing COPY instructions from ARC: Steal the operand. If
@@ -1772,21 +1774,21 @@ bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
         // GFX908 directly instead of a COPY. Otherwise, SIFoldOperand may try
         // to fold the sgpr -> vgpr -> agpr copy into a sgpr -> agpr copy which
         // is unlikely to be profitable.
+        //
+        // Note that V_ACCVGPR_WRITE is only used for AGPR_32.
         MachineOperand &CopyIn = Def->getOperand(1);
-        if (!ST->hasGFX90AInsts() && !MRI->hasOneNonDBGUse(Reg) &&
+        if (IsAGPR32 && !ST->hasGFX90AInsts() && !MRI->hasOneNonDBGUse(Reg) &&
             TRI->isSGPRReg(*MRI, CopyIn.getReg()))
-          UseAccVGPRWrite = true;
+          CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
       }
 
-      InsertPt = ++Def->getIterator();
       InsertMBB = Def->getParent();
+      InsertPt = InsertMBB->SkipPHIsLabelsAndDebug(++Def->getIterator());
     } else {
       InsertMBB = PHI.getOperand(MO.getOperandNo() + 1).getMBB();
       InsertPt = InsertMBB->getFirstTerminator();
     }
 
-    const unsigned CopyOpc =
-        UseAccVGPRWrite ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
     Register NewReg = MRI->createVirtualRegister(ARC);
     MachineInstr *MI = BuildMI(*InsertMBB, InsertPt, PHI.getDebugLoc(),
                                TII->get(CopyOpc), NewReg)

diff  --git a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
index 31fb5e3ddfb3b0..ac4e07422066d0 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
@@ -40,6 +40,7 @@ body: |
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
   ; GFX908-NEXT:   S_ENDPGM 0
+  ;
   ; GFX90A-LABEL: name: test_sgpr_init_multiuse
   ; GFX90A: bb.0:
   ; GFX90A-NEXT:   successors: %bb.1(0x80000000)
@@ -101,6 +102,90 @@ body: |
     S_ENDPGM 0
 ...
 
+---
+name: test_sgpr_init_multiuse_agprtuple
+tracksRegLiveness: true
+
+body: |
+  ; GFX908-LABEL: name: test_sgpr_init_multiuse_agprtuple
+  ; GFX908: bb.0:
+  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX908-NEXT:   liveins: $sgpr0_sgpr1, $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]]
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:areg_64_align2 = COPY [[COPY1]]
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[COPY1]]
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT: bb.1:
+  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX908-NEXT:   liveins: $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[PHI:%[0-9]+]]:areg_64_align2 = PHI [[COPY3]], %bb.0, %9.sub0_sub1, %bb.1
+  ; GFX908-NEXT:   [[PHI1:%[0-9]+]]:areg_64_align2 = PHI [[COPY2]], %bb.0, %9.sub2_sub3, %bb.1
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[PHI1]]
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[PHI]]
+  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY5]].sub0, %subreg.sub0, [[COPY5]].sub1, %subreg.sub1, [[COPY4]].sub0, %subreg.sub2, [[COPY4]].sub1, %subreg.sub3
+  ; GFX908-NEXT:   [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1073741824, implicit $exec
+  ; GFX908-NEXT:   [[V_MOV_B64_e32_1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1065353216, implicit $exec
+  ; GFX908-NEXT:   [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B64_e32_1]].sub0, [[V_MOV_B64_e32_]].sub1, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
+  ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   S_ENDPGM 0
+  ;
+  ; GFX90A-LABEL: name: test_sgpr_init_multiuse_agprtuple
+  ; GFX90A: bb.0:
+  ; GFX90A-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX90A-NEXT:   liveins: $sgpr0_sgpr1, $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]]
+  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:areg_64_align2 = COPY [[COPY1]]
+  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[COPY1]]
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT: bb.1:
+  ; GFX90A-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX90A-NEXT:   liveins: $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT:   [[PHI:%[0-9]+]]:areg_64_align2 = PHI [[COPY3]], %bb.0, %9.sub0_sub1, %bb.1
+  ; GFX90A-NEXT:   [[PHI1:%[0-9]+]]:areg_64_align2 = PHI [[COPY2]], %bb.0, %9.sub2_sub3, %bb.1
+  ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[PHI1]]
+  ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[PHI]]
+  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY5]].sub0, %subreg.sub0, [[COPY5]].sub1, %subreg.sub1, [[COPY4]].sub0, %subreg.sub2, [[COPY4]].sub1, %subreg.sub3
+  ; GFX90A-NEXT:   [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1073741824, implicit $exec
+  ; GFX90A-NEXT:   [[V_MOV_B64_e32_1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1065353216, implicit $exec
+  ; GFX90A-NEXT:   [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B64_e32_1]].sub0, [[V_MOV_B64_e32_]].sub1, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
+  ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT: bb.2:
+  ; GFX90A-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr0_sgpr1, $scc
+
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:vreg_64_align2 = COPY %0:sgpr_64
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    liveins: $scc
+
+    %2:vreg_64_align2 = PHI %1, %bb.0, %3, %bb.1
+    %4:vreg_64_align2 = PHI %1, %bb.0, %5, %bb.1
+    %6:areg_128_align2 = REG_SEQUENCE %2.sub0, %subreg.sub0, %2.sub1, %subreg.sub1, %4.sub0, %subreg.sub2, %4.sub1, %subreg.sub3
+    %7:vreg_64_align2 = V_MOV_B64_e32 1073741824, implicit $exec
+    %8:vreg_64_align2 = V_MOV_B64_e32 1065353216, implicit $exec
+    %9:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %8.sub0, %7.sub1, %6:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec
+    %3:vreg_64_align2 = COPY %9.sub0_sub1:areg_128_align2
+    %5:vreg_64_align2 = COPY %9.sub2_sub3:areg_128_align2
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+  bb.2:
+
+    S_ENDPGM 0
+...
+
 ---
 name: test_sgpr_init_singleuse
 tracksRegLiveness: true
@@ -141,6 +226,7 @@ body: |
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
   ; GFX908-NEXT:   S_ENDPGM 0
+  ;
   ; GFX90A-LABEL: name: test_sgpr_init_singleuse
   ; GFX90A: bb.0:
   ; GFX90A-NEXT:   successors: %bb.1(0x80000000)
@@ -244,6 +330,7 @@ body: |
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
   ; GFX908-NEXT:   S_ENDPGM 0
+  ;
   ; GFX90A-LABEL: name: test_vgpr_init
   ; GFX90A: bb.0:
   ; GFX90A-NEXT:   successors: %bb.1(0x80000000)
@@ -343,6 +430,7 @@ body: |
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
   ; GFX908-NEXT:   S_ENDPGM 0
+  ;
   ; GFX90A-LABEL: name: test_use_vgpr_temp
   ; GFX90A: bb.0:
   ; GFX90A-NEXT:   successors: %bb.1(0x80000000)
@@ -445,6 +533,7 @@ body: |
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
   ; GFX908-NEXT:   S_ENDPGM 0
+  ;
   ; GFX90A-LABEL: name: test_vgpr_init_two_copies
   ; GFX90A: bb.0:
   ; GFX90A-NEXT:   successors: %bb.1(0x80000000)
@@ -504,3 +593,125 @@ body: |
   bb.2:
     S_ENDPGM 0
 ...
+
+---
+name: test_vgpr_init_skip_phis_insertpt
+tracksRegLiveness: true
+
+body: |
+  ; GFX908-LABEL: name: test_vgpr_init_skip_phis_insertpt
+  ; GFX908: bb.0:
+  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT: bb.1:
+  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX908-NEXT:   liveins: $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1
+  ; GFX908-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:agpr_32 = COPY [[PHI]]
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:agpr_32 = COPY [[PHI]]
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:agpr_32 = COPY [[PHI]]
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:agpr_32 = COPY [[PHI]]
+  ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX908-NEXT:   liveins: $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY5]], %bb.1, %15.sub0, %bb.2
+  ; GFX908-NEXT:   [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.1, %15.sub1, %bb.2
+  ; GFX908-NEXT:   [[PHI4:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.1, %15.sub2, %bb.2
+  ; GFX908-NEXT:   [[PHI5:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.1, %15.sub3, %bb.2
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI5]]
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI4]]
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
+  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3
+  ; GFX908-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
+  ; GFX908-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+  ; GFX908-NEXT:   [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
+  ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT: bb.3:
+  ; GFX908-NEXT:   S_ENDPGM 0
+  ;
+  ; GFX90A-LABEL: name: test_vgpr_init_skip_phis_insertpt
+  ; GFX90A: bb.0:
+  ; GFX90A-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT: bb.1:
+  ; GFX90A-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX90A-NEXT:   liveins: $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1
+  ; GFX90A-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1
+  ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:agpr_32 = COPY [[PHI]]
+  ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:agpr_32 = COPY [[PHI]]
+  ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:agpr_32 = COPY [[PHI]]
+  ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:agpr_32 = COPY [[PHI]]
+  ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT: bb.2:
+  ; GFX90A-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX90A-NEXT:   liveins: $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT:   [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY5]], %bb.1, %15.sub0, %bb.2
+  ; GFX90A-NEXT:   [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.1, %15.sub1, %bb.2
+  ; GFX90A-NEXT:   [[PHI4:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.1, %15.sub2, %bb.2
+  ; GFX90A-NEXT:   [[PHI5:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.1, %15.sub3, %bb.2
+  ; GFX90A-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI5]]
+  ; GFX90A-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI4]]
+  ; GFX90A-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI3]]
+  ; GFX90A-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI2]]
+  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3
+  ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
+  ; GFX90A-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+  ; GFX90A-NEXT:   [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec
+  ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT: bb.3:
+  ; GFX90A-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $scc
+    successors: %bb.1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr0
+
+  bb.1:
+    liveins: $scc
+    successors: %bb.1, %bb.2
+
+    %6:vgpr_32 = PHI %0, %bb.0, %1, %bb.1
+    %7:vgpr_32 = PHI %0, %bb.0, %1, %bb.1
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+  bb.2:
+    liveins: $scc
+    successors: %bb.2, %bb.3
+    %8:vgpr_32 = PHI %6, %bb.1, %16, %bb.2
+    %9:vgpr_32 = PHI %6, %bb.1, %17, %bb.2
+    %10:vgpr_32 = PHI %6, %bb.1, %18, %bb.2
+    %11:vgpr_32 = PHI %6, %bb.1, %19, %bb.2
+    %12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3
+    %13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
+    %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+    %15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec
+    %16:vgpr_32 = COPY %15.sub0
+    %17:vgpr_32 = COPY %15.sub1
+    %18:vgpr_32 = COPY %15.sub2
+    %19:vgpr_32 = COPY %15.sub3
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+
+  bb.3:
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list