[llvm] [AMDGPU][True16][CodeGen] fix moveToVALU with proper subreg access in true16 (PR #132089)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 13:26:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
When a SGPR copy is lowered to a VALU, check if the new VALU instruction is used by a true16 instructions. Add subreg access if necessary.
---
Full diff: https://github.com/llvm/llvm-project/pull/132089.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+16)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index fb791c8342282..5f2bd507d1767 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7835,6 +7835,22 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
assert(NewDstRC);
NewDstReg = MRI.createVirtualRegister(NewDstRC);
MRI.replaceRegWith(DstReg, NewDstReg);
+
+ // Check useMI of NewInstr. If used by a true16 instruction,
+ // add a lo16 subreg access if size mismatched
+ if (ST.useRealTrue16Insts() && NewDstRC == &AMDGPU::VGPR_32RegClass) {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
+ E = MRI.use_end();
+ I != E; ++I) {
+ MachineInstr &UseMI = *I->getParent();
+ unsigned UseMIOpcode = UseMI.getOpcode();
+ if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
+ (16 ==
+ RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
+ I->setSubReg(AMDGPU::lo16);
+ }
+ }
+ }
}
fixImplicitOperands(*NewInstr);
// Legalize the operands
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index 5ea39997938ad..4f6b334ec0819 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -699,7 +699,7 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0/*Invalid register, operand has 'VS_16' register class*/, s0, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
``````````
</details>
https://github.com/llvm/llvm-project/pull/132089
More information about the llvm-commits
mailing list