[llvm] [AMDGPU] Eliminate unnecessary packing in wider f16 vectors for sdwa/opsel-able instruction (PR #137137)
Vikash Gupta via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 22:58:20 PST 2025
================
@@ -1369,6 +1457,434 @@ bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) {
return SIPeepholeSDWA().run(MF);
}
+static bool isSrcDestFP16Bits(MachineInstr *MI, const SIInstrInfo *TII) {
+ static const DenseSet<unsigned> FP16BitOpcodes = {
+ AMDGPU::V_CVT_F16_U16_e32, AMDGPU::V_CVT_F16_U16_e64,
+ AMDGPU::V_CVT_F16_I16_e32, AMDGPU::V_CVT_F16_I16_e64,
+ AMDGPU::V_RCP_F16_e64, AMDGPU::V_RCP_F16_e32,
+ AMDGPU::V_RSQ_F16_e64, AMDGPU::V_RSQ_F16_e32,
+ AMDGPU::V_SQRT_F16_e64, AMDGPU::V_SQRT_F16_e32,
+ AMDGPU::V_LOG_F16_e64, AMDGPU::V_LOG_F16_e32,
+ AMDGPU::V_EXP_F16_e64, AMDGPU::V_EXP_F16_e32,
+ AMDGPU::V_SIN_F16_e64, AMDGPU::V_SIN_F16_e32,
+ AMDGPU::V_COS_F16_e64, AMDGPU::V_COS_F16_e32,
+ AMDGPU::V_FLOOR_F16_e64, AMDGPU::V_FLOOR_F16_e32,
+ AMDGPU::V_CEIL_F16_e64, AMDGPU::V_CEIL_F16_e32,
+ AMDGPU::V_TRUNC_F16_e64, AMDGPU::V_TRUNC_F16_e32,
+ AMDGPU::V_RNDNE_F16_e64, AMDGPU::V_RNDNE_F16_e32,
+ AMDGPU::V_FRACT_F16_e64, AMDGPU::V_FRACT_F16_e32,
+ AMDGPU::V_FREXP_MANT_F16_e64, AMDGPU::V_FREXP_MANT_F16_e32,
+ AMDGPU::V_FREXP_EXP_I16_F16_e64, AMDGPU::V_FREXP_EXP_I16_F16_e32,
+ AMDGPU::V_LDEXP_F16_e64, AMDGPU::V_LDEXP_F16_e32,
+ AMDGPU::V_ADD_F16_e64, AMDGPU::V_ADD_F16_e32,
+ AMDGPU::V_SUB_F16_e64, AMDGPU::V_SUB_F16_e32,
+ AMDGPU::V_SUBREV_F16_e64, AMDGPU::V_SUBREV_F16_e32,
+ AMDGPU::V_MUL_F16_e64, AMDGPU::V_MUL_F16_e32,
+ AMDGPU::V_MAX_F16_e64, AMDGPU::V_MAX_F16_e32,
+ AMDGPU::V_MIN_F16_e64, AMDGPU::V_MIN_F16_e32,
+ AMDGPU::V_MAD_F16_e64, AMDGPU::V_FMA_F16_e64,
+ AMDGPU::V_DIV_FIXUP_F16_e64};
+
+ unsigned Opcode = MI->getOpcode();
+ if (TII->isSDWA(Opcode))
+ Opcode = AMDGPU::getBasicFromSDWAOp(Opcode);
+
+ return FP16BitOpcodes.contains(Opcode);
----------------
vg0204 wrote:
> Broader question: are there other opcodes that use OPSEL or SDWA that are eligible for this optemization? (I'd suspect there's some BF16 ones, say)
As you now mentioned, I attempted to generate the list for such opcodes using cursor as I attached here
[AMDGPU_16bit_Opcodes.md](https://github.com/user-attachments/files/24048736/AMDGPU_16bit_Opcodes.md) which seems quite apt to me to be used!
https://github.com/llvm/llvm-project/pull/137137
More information about the llvm-commits
mailing list