[llvm] [AMDGPU] Don't optimize agpr phis if the operand doesn't have subreg use (PR #91267)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Tue May 7 10:05:39 PDT 2024


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/91267

>From fce0ec692c493298f9e8b1c5cb493486ca210cc0 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 7 May 2024 13:05:27 -0400
Subject: [PATCH] [AMDGPU] Don't optimize agpr phis if there the operand
 doesn't have subreg use

If the operand doesn't have any subreg use, the optimization could potentially
generate `V_ACCVGPR_READ_B32_e64` with wrong register class, such as the
following case:

%46:vreg_128 = V_ACCVGPR_READ_B32_e64 %38:areg_128, implicit $exec
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |  2 +
 ...p-optimize-agpr-phi-without-subreg-use.mir | 64 +++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/skip-optimize-agpr-phi-without-subreg-use.mir

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index cb448aaafa4c..5c411a095587 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -2106,6 +2106,8 @@ bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
 
     for (unsigned K = 1; K < MI.getNumOperands(); K += 2) {
       MachineOperand &PhiMO = MI.getOperand(K);
+      if (!PhiMO.getSubReg())
+        continue;
       RegToMO[{PhiMO.getReg(), PhiMO.getSubReg()}].push_back(&PhiMO);
     }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/skip-optimize-agpr-phi-without-subreg-use.mir b/llvm/test/CodeGen/AMDGPU/skip-optimize-agpr-phi-without-subreg-use.mir
new file mode 100644
index 000000000000..1a6d8dfce95b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/skip-optimize-agpr-phi-without-subreg-use.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck %s
+
+---
+name:            skip_optimize_agpr_phi_without_subreg_use
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+body:             |
+  ; CHECK-LABEL: name: skip_optimize_agpr_phi_without_subreg_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+  ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %8, %bb.1
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:areg_128 = PHI [[REG_SEQUENCE]], %bb.0, %10, %bb.1
+  ; CHECK-NEXT:   [[V_MFMA_F32_16X16X4F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_16X16X4F32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], [[PHI1]], 0, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[BUFFER_LOAD_DWORD_OFFSET]], 1, implicit $exec
+  ; CHECK-NEXT:   [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_LT_I32_e64_]], [[PHI]], implicit-def dead $scc
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:areg_128 = COPY [[V_MFMA_F32_16X16X4F32_e64_]], implicit $exec
+  ; CHECK-NEXT:   SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   SI_RETURN
+  bb.0:
+    successors: %bb.1(0x80000000)
+
+    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %1:sgpr_32 = S_MOV_B32 0
+    %2:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3
+    %3:vreg_128 = COPY %2
+    %4:sreg_64 = S_MOV_B64 0
+    %5:areg_128 = COPY %3, implicit $exec
+    %6:sreg_32 = S_MOV_B32 1
+
+  bb.1:
+    successors: %bb.1(0x80000000)
+
+    %7:sreg_64 = PHI %4, %bb.0, %8, %bb.1
+    %9:areg_128 = PHI %5, %bb.0, %10, %bb.1
+    %11:areg_128 = V_MFMA_F32_16X16X4F32_e64 %0, %0, %9, 0, 0, 0, implicit $mode, implicit $exec
+    %12:vgpr_32 = COPY %11.sub3
+    %13:vgpr_32 = COPY %11.sub2
+    %14:vgpr_32 = COPY %11.sub1
+    %15:vgpr_32 = COPY %11.sub0
+    %16:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+    %17:sreg_64 = V_CMP_LT_I32_e64 killed %16, %6, implicit $exec
+    %8:sreg_64 = SI_IF_BREAK killed %17, %7, implicit-def dead $scc
+    %10:areg_128 = COPY %11, implicit $exec
+    SI_LOOP %8, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    SI_RETURN
+
+...



More information about the llvm-commits mailing list