[llvm] [AMDGPU] Don't optimize agpr phis if the operand doesn't have subreg use (PR #91267)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue May 7 07:52:03 PDT 2024


================
@@ -0,0 +1,58 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck %s
+
+# CHECK-NOT: V_ACCVGPR_READ_B32_e64
+
+---
+name:            skip_optimize_agpr_phi_without_subreg_use
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+body:             |
+  bb.0:
+    successors: %bb.1(0x80000000); %bb.1(100.00%)
+
+    %10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %11:sgpr_32 = S_MOV_B32 0
+    %12:sgpr_128 = REG_SEQUENCE %11:sgpr_32, %subreg.sub0, %11:sgpr_32, %subreg.sub1, %11:sgpr_32, %subreg.sub2, %11:sgpr_32, %subreg.sub3
+    %0:vreg_128 = COPY %12:sgpr_128
+    %9:sreg_64 = S_MOV_B64 0
+    %38:areg_128 = COPY %0:vreg_128, implicit $exec
+    %27:sreg_32 = S_MOV_B32 1
+
+  bb.1:
+    successors: %bb.2(0x04000000), %bb.1(0x7c000000); %bb.2(3.12%), %bb.1(96.88%)
+
+    %2:sreg_64 = PHI %9:sreg_64, %bb.0, %7:sreg_64, %bb.1
+    %3:areg_128 = PHI %38:areg_128, %bb.0, %39:areg_128, %bb.1
+    %4:areg_128 = PHI %38:areg_128, %bb.0, %41:areg_128, %bb.1
+    %14:areg_128 = V_MFMA_F32_16X16X4F32_e64 %10:vgpr_32, %10:vgpr_32, %4:areg_128, 0, 0, 0, implicit $mode, implicit $exec
+    %16:vgpr_32 = COPY %14.sub3:areg_128
+    %17:vgpr_32 = COPY %14.sub2:areg_128
+    %18:vgpr_32 = COPY %14.sub1:areg_128
+    %19:vgpr_32 = COPY %14.sub0:areg_128
+    %20:areg_128 = V_MFMA_F32_16X16X4F32_e64 %10:vgpr_32, %10:vgpr_32, %3:areg_128, 0, 0, 0, implicit $mode, implicit $exec
+    %22:vgpr_32 = COPY %20.sub3:areg_128
+    %23:vgpr_32 = COPY %20.sub2:areg_128
+    %24:vgpr_32 = COPY %20.sub1:areg_128
+    %25:vgpr_32 = COPY %20.sub0:areg_128
+    %26:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+    %28:sreg_64 = V_CMP_LT_I32_e64 killed %26:vgpr_32, %27:sreg_32, implicit $exec
+    %7:sreg_64 = SI_IF_BREAK killed %28:sreg_64, %2:sreg_64, implicit-def dead $scc
+    %39:areg_128 = COPY %20:areg_128, implicit $exec
+    %41:areg_128 = COPY %14:areg_128, implicit $exec
+    SI_LOOP %7:sreg_64, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
----------------
arsenm wrote:

Can you reduce this down to avoid using any of the control flow pseudos 

https://github.com/llvm/llvm-project/pull/91267


More information about the llvm-commits mailing list