[llvm] [NVPTX] Vectorize and lower 256-bit global loads/stores for sm_100+/ptx88+ (PR #139292)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 10:24:30 PDT 2025
================
@@ -2438,17 +2438,27 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
+ if support_v8 then {
+ def _v8 : NVPTXInst<
+ (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
+ regclass:$dst5, regclass:$dst6, regclass:$dst7, regclass:$dst8),
+ (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, ADDR:$addr),
+ "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
+ "[$addr];", []>;
+ }
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
defm LDV_i16 : LD_VEC<Int16Regs>;
- defm LDV_i32 : LD_VEC<Int32Regs>;
+ defm LDV_i32 : LD_VEC<Int32Regs, true>;
----------------
AlexMaclean wrote:
Use `support_v8 = true` here for clarity.
https://github.com/llvm/llvm-project/pull/139292
More information about the llvm-commits
mailing list