[llvm] [AMDGPU] Don't optimize agpr phis if the operand doesn't have subreg use (PR #91267)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue May 7 07:52:03 PDT 2024
================
@@ -0,0 +1,58 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck %s
+
+# CHECK-NOT: V_ACCVGPR_READ_B32_e64
+
+---
+name: skip_optimize_agpr_phi_without_subreg_use
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+body: |
+ bb.0:
+ successors: %bb.1(0x80000000); %bb.1(100.00%)
+
+ %10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %11:sgpr_32 = S_MOV_B32 0
+ %12:sgpr_128 = REG_SEQUENCE %11:sgpr_32, %subreg.sub0, %11:sgpr_32, %subreg.sub1, %11:sgpr_32, %subreg.sub2, %11:sgpr_32, %subreg.sub3
+ %0:vreg_128 = COPY %12:sgpr_128
+ %9:sreg_64 = S_MOV_B64 0
+ %38:areg_128 = COPY %0:vreg_128, implicit $exec
+ %27:sreg_32 = S_MOV_B32 1
+
+ bb.1:
+ successors: %bb.2(0x04000000), %bb.1(0x7c000000); %bb.2(3.12%), %bb.1(96.88%)
+
+ %2:sreg_64 = PHI %9:sreg_64, %bb.0, %7:sreg_64, %bb.1
+ %3:areg_128 = PHI %38:areg_128, %bb.0, %39:areg_128, %bb.1
+ %4:areg_128 = PHI %38:areg_128, %bb.0, %41:areg_128, %bb.1
+ %14:areg_128 = V_MFMA_F32_16X16X4F32_e64 %10:vgpr_32, %10:vgpr_32, %4:areg_128, 0, 0, 0, implicit $mode, implicit $exec
+ %16:vgpr_32 = COPY %14.sub3:areg_128
+ %17:vgpr_32 = COPY %14.sub2:areg_128
+ %18:vgpr_32 = COPY %14.sub1:areg_128
+ %19:vgpr_32 = COPY %14.sub0:areg_128
+ %20:areg_128 = V_MFMA_F32_16X16X4F32_e64 %10:vgpr_32, %10:vgpr_32, %3:areg_128, 0, 0, 0, implicit $mode, implicit $exec
+ %22:vgpr_32 = COPY %20.sub3:areg_128
+ %23:vgpr_32 = COPY %20.sub2:areg_128
+ %24:vgpr_32 = COPY %20.sub1:areg_128
+ %25:vgpr_32 = COPY %20.sub0:areg_128
+ %26:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+ %28:sreg_64 = V_CMP_LT_I32_e64 killed %26:vgpr_32, %27:sreg_32, implicit $exec
+ %7:sreg_64 = SI_IF_BREAK killed %28:sreg_64, %2:sreg_64, implicit-def dead $scc
+ %39:areg_128 = COPY %20:areg_128, implicit $exec
+ %41:areg_128 = COPY %14:areg_128, implicit $exec
+ SI_LOOP %7:sreg_64, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
----------------
arsenm wrote:
Can you reduce this down to avoid using any of the control flow pseudos
https://github.com/llvm/llvm-project/pull/91267
More information about the llvm-commits
mailing list