[llvm-branch-commits] [llvm] 51a92a4 - Revert "[AMDGPU] Constrain register class during COPY elimination based on th…"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Feb 18 01:18:12 PST 2026
Author: Matt Arsenault
Date: 2026-02-18T10:18:09+01:00
New Revision: 51a92a4a62ea53fde782aeda19d38724f0a4c61b
URL: https://github.com/llvm/llvm-project/commit/51a92a4a62ea53fde782aeda19d38724f0a4c61b
DIFF: https://github.com/llvm/llvm-project/commit/51a92a4a62ea53fde782aeda19d38724f0a4c61b.diff
LOG: Revert "[AMDGPU] Constrain register class during COPY elimination based on th…"
This reverts commit e0b3e82e98fedc08c9351627f528f5cdbe58b54e.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Removed:
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wmma-scale-lo256.mir
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9180d5fc8bcf0..24aa31a318df3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8319,26 +8319,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
const TargetRegisterClass *SrcRC = RI.getRegClassForReg(MRI, NewDstReg);
if (const TargetRegisterClass *CommonRC =
RI.getCommonSubClass(NewDstRC, SrcRC)) {
- // Also intersect with VGPR-compatible operand register class
- // constraints from user instructions. This preserves restricted
- // register classes (e.g., VGPR_32_Lo256 for WMMA scale operands) that
- // would otherwise be lost when an SGPR is replaced with a VGPR.
- // Constraints incompatible with VGPRs (e.g., SALU instructions
- // requiring SReg_32) are skipped because those users will be converted
- // to VALU by the worklist.
- for (const MachineOperand &UseMO : MRI.use_operands(DstReg)) {
- const MachineInstr *UseMI = UseMO.getParent();
- if (UseMI == &Inst)
- continue;
- unsigned OpIdx = UseMI->getOperandNo(&UseMO);
- if (const TargetRegisterClass *OpRC =
- getRegClass(UseMI->getDesc(), OpIdx)) {
- if (const TargetRegisterClass *Narrowed =
- RI.getCommonSubClass(CommonRC, OpRC))
- CommonRC = Narrowed;
- }
- }
-
// Instead of creating a copy where src and dst are the same register
// class, we just replace all uses of dst with src. These kinds of
// copies interfere with the heuristics MachineSink uses to decide
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wmma-scale-lo256.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wmma-scale-lo256.mir
deleted file mode 100644
index 4cead3056d808..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wmma-scale-lo256.mir
+++ /dev/null
@@ -1,65 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck %s
-
-# Test that si-fix-sgpr-copies preserves the Lo256 register class constraint
-# when eliminating a VGPR-to-SGPR copy used as a WMMA scale operand.
-#
-# The scale_src0 and scale_src1 operands of V_WMMA_SCALE instructions require
-# registers from VCSrc_b32_Lo256 (VS_32_Lo256), which only allows VGPRs 0-255.
-# When si-fix-sgpr-copies eliminates a VGPR-to-SGPR copy by replacing uses of
-# the SGPR with the VGPR source, it must constrain the VGPR to vgpr_32_lo256
-# to preserve this hardware encoding requirement.
-
----
-name: wmma_scale_copy_vgpr_to_sgpr
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $sgpr0
- ; CHECK-LABEL: name: wmma_scale_copy_vgpr_to_sgpr
- ; CHECK: liveins: $vgpr0, $sgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32_lo256 = COPY $vgpr0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
- ; CHECK-NEXT: early-clobber %6:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF]], [[DEF1]], 0, [[DEF2]], [[COPY]], [[DEF3]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0
- %0:vgpr_32 = COPY $vgpr0
- %1:sreg_32 = COPY %0
- %2:vreg_512_align2 = IMPLICIT_DEF
- %3:vreg_512_align2 = IMPLICIT_DEF
- %4:vreg_256_align2 = IMPLICIT_DEF
- %5:vgpr_32_lo256 = IMPLICIT_DEF
- %6:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr %2, %3, 0, %4, %1, %5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- S_ENDPGM 0
-...
-
-# Also test scale_src1 (operand 6) constraint is preserved.
-
----
-name: wmma_scale_copy_vgpr_to_sgpr_src1
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $sgpr0
- ; CHECK-LABEL: name: wmma_scale_copy_vgpr_to_sgpr_src1
- ; CHECK: liveins: $vgpr0, $sgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32_lo256 = COPY $vgpr0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
- ; CHECK-NEXT: early-clobber %6:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF]], [[DEF1]], 0, [[DEF2]], [[DEF3]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0
- %0:vgpr_32 = COPY $vgpr0
- %1:sreg_32 = COPY %0
- %2:vreg_512_align2 = IMPLICIT_DEF
- %3:vreg_512_align2 = IMPLICIT_DEF
- %4:vreg_256_align2 = IMPLICIT_DEF
- %5:vgpr_32_lo256 = IMPLICIT_DEF
- %6:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr %2, %3, 0, %4, %5, %1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- S_ENDPGM 0
-...
More information about the llvm-branch-commits
mailing list