[llvm] [AMDGPU][True16][CodeGen] fix moveToVALU with proper subreg access in true16 (PR #132089)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 13:26:46 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

<details>
<summary>Changes</summary>

When a SGPR copy is lowered to a VALU, check if the new VALU instruction is used by a true16 instructions. Add subreg access if necessary.

---
Full diff: https://github.com/llvm/llvm-project/pull/132089.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+16) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index fb791c8342282..5f2bd507d1767 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7835,6 +7835,22 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
     assert(NewDstRC);
     NewDstReg = MRI.createVirtualRegister(NewDstRC);
     MRI.replaceRegWith(DstReg, NewDstReg);
+
+    // Check useMI of NewInstr. If used by a true16 instruction,
+    // add a lo16 subreg access if size mismatched
+    if (ST.useRealTrue16Insts() && NewDstRC == &AMDGPU::VGPR_32RegClass) {
+      for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
+                                             E = MRI.use_end();
+           I != E; ++I) {
+        MachineInstr &UseMI = *I->getParent();
+        unsigned UseMIOpcode = UseMI.getOpcode();
+        if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
+            (16 ==
+             RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
+          I->setSubReg(AMDGPU::lo16);
+        }
+      }
+    }
   }
   fixImplicitOperands(*NewInstr);
   // Legalize the operands
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index 5ea39997938ad..4f6b334ec0819 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -699,7 +699,7 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
 ; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, s0, v0.l
 ; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0/*Invalid register, operand has 'VS_16' register class*/, s0, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, s0, vcc_lo
 ; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-SAFE-TRUE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
 ; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo

``````````

</details>


https://github.com/llvm/llvm-project/pull/132089


More information about the llvm-commits mailing list