[llvm] [AMDGPU] Fold multiple aligned v_mov_b32 to v_mov_b64 on gfx942 (PR #138843)
Chris Jackson via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 4 08:53:53 PDT 2025
================
@@ -2331,6 +2332,113 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
return true;
}
+// gfx942+ can use V_MOV_B64 for materializing constant immediates.
+// For example:
+// %0:vgpr_32 = V_MOV_B32 0, implicit $exec
+// %1:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1
+// ->
+// %1:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
+bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
+ assert(MI.isRegSequence());
+ auto Reg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+ const MCInstrDesc &MovDesc = TII->get(AMDGPU::V_MOV_B64_PSEUDO);
+ const TargetRegisterClass *RC =
+ TII->getRegClass(MovDesc, 0, TRI, *MI.getMF());
+
+ if (!ST->hasMovB64() || !TRI->isVGPR(*MRI, Reg) ||
+ !MRI->hasOneNonDBGUse(Reg) ||
+ (!TRI->getCompatibleSubRegClass(DefRC, RC, AMDGPU::sub0_sub1) &&
+ DefRC != RC))
+ return false;
+
+ SmallVector<std::pair<MachineOperand *, unsigned>, 32> Defs;
+ if (!getRegSeqInit(Defs, Reg))
+ return false;
+
+ // Only attempting to fold immediate materializations.
----------------
chrisjbris wrote:
```suggestion
// Only attempt to fold immediate materializations.
```
nit
https://github.com/llvm/llvm-project/pull/138843
More information about the llvm-commits
mailing list