[clang] [llvm] AMDGPU: Add back half and bfloat support for global_load_tr16 pats (PR #99540)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 18 11:23:33 PDT 2024
================
@@ -1590,10 +1590,14 @@ let OtherPredicates = [isGFX12Plus] in {
let WaveSizePredicate = isWave32 in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr_b64, v2i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, v8i16>;
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, v8f16>;
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, v8bf16>;
}
let WaveSizePredicate = isWave64 in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr_b64, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, v4i16>;
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, v4f16>;
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, v4bf16>;
----------------
arsenm wrote:
Can you just loop over all register types like is done for most regular load/store now
https://github.com/llvm/llvm-project/pull/99540
More information about the cfe-commits
mailing list