[llvm] 597f93d - AMDGPU: Check if immediate is legal for av_mov_b32_imm_pseudo (#160819)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 16:24:24 PDT 2025
Author: Matt Arsenault
Date: 2025-09-27T08:24:20+09:00
New Revision: 597f93d36b035faeb63f4ba0d61a8b8e25eddaab
URL: https://github.com/llvm/llvm-project/commit/597f93d36b035faeb63f4ba0d61a8b8e25eddaab
DIFF: https://github.com/llvm/llvm-project/commit/597f93d36b035faeb63f4ba0d61a8b8e25eddaab.diff
LOG: AMDGPU: Check if immediate is legal for av_mov_b32_imm_pseudo (#160819)
This is primarily to avoid folding a frame index materialized
into an SGPR into the pseudo; this would end up looking like:
%sreg = s_mov_b32 %stack.0
%av_32 = av_mov_b32_imm_pseudo %sreg
Which is not useful.
Match the check used for the b64 case. This is limited to the
pseudo to avoid regression due to gfx908's special case - it
is expecting to pass here with v_accvgpr_write_b32 for illegal
cases, and stay in the intermediate state with an sgpr input.
This avoids regressions in a future patch.
Added:
llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 51c56ecea2c96..fed37788802b9 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1313,6 +1313,15 @@ void SIFoldOperandsImpl::foldOperand(
if (MovSrcRC) {
if (UseSubReg)
MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+
+ // FIXME: We should be able to directly check immediate operand legality
+ // for all cases, but gfx908 hacks break.
+ if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
+ (!OpToFold.isImm() ||
+ !TII->isImmOperandLegal(MovDesc, SrcIdx,
+ *OpToFold.getEffectiveImmVal())))
+ break;
+
if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
break;
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
index 73cdcddbef135..a3b2191695734 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
@@ -209,8 +209,8 @@ body: |
bb.0:
; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_av_32
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
- ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:sreg_32 = S_MOV_B32 65, implicit $exec
%1:av_32 = COPY %0
S_ENDPGM 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index dfcf9a1f5c5ae..bec188e4e8378 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -240,8 +240,8 @@ body: |
bb.0:
; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
- ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
- ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+ ; GCN-NEXT: $agpr0 = COPY [[COPY]]
; GCN-NEXT: S_ENDPGM 0
%0:sreg_32 = S_MOV_B32 999
%1:av_32 = COPY %0
@@ -257,8 +257,8 @@ body: |
bb.0:
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
- ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[V_MOV_B32_e32_]], implicit $exec
- ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+ ; GCN-NEXT: $agpr0 = COPY [[COPY]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
%1:av_32 = COPY %0
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir
new file mode 100644
index 0000000000000..32a209608a4d0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
+
+---
+name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+ localFrameSize: 16384
+stack:
+ - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+ ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
+ ; CHECK-NEXT: SI_RETURN implicit [[AV_MOV_]]
+ %0:sreg_32 = S_MOV_B32 %stack.0
+ %1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
+ SI_RETURN implicit %1
+
+...
+
+---
+name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+ localFrameSize: 16384
+stack:
+ - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+ ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:sreg_32 = S_MOV_B32 %stack.0
+ %1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
+ %2:vgpr_32 = COPY %1, implicit $exec
+ $vgpr0 = COPY %2
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
+ ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:sreg_32 = S_MOV_B32 1234
+ %1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
+ %2:vgpr_32 = COPY %1, implicit $exec
+ $vgpr0 = COPY %2
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
+ ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:sreg_32 = S_MOV_B32 8
+ %1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
+ %2:vgpr_32 = COPY %1, implicit $exec
+ $vgpr0 = COPY %2
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: fold_frame_index_av_regression_0
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+ localFrameSize: 16384
+stack:
+ - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_frame_index_av_regression_0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:sreg_32 = S_MOV_B32 %stack.0
+ %1:av_32 = COPY %0
+ %2:vgpr_32 = COPY %1, implicit $exec
+ $vgpr0 = COPY %2
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: fold_frame_index_av_regression_1
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+ localFrameSize: 16384
+stack:
+ - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_frame_index_av_regression_1
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:sreg_32 = S_MOV_B32 %stack.0
+ %1:sreg_32 = S_MOV_B32 killed %0
+ %2:sreg_64 = S_MOV_B64 0
+ %3:av_32 = COPY %1
+ %4:vgpr_32 = COPY %3, implicit $exec
+ $vgpr0 = COPY %4
+ SI_RETURN implicit $vgpr0
+
+...
+
More information about the llvm-commits
mailing list