[llvm] AMDGPU/GlobalISel: Add regbanklegalize rules for load and store (PR #153176)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 05:39:34 PDT 2025
================
@@ -654,54 +657,187 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
return (*MI.memoperands_begin())->getFlags() & MONoClobber;
});
- Predicate isNaturalAlignedSmall([](const MachineInstr &MI) -> bool {
+ Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return MMO->getAlign() >= Align(MMO->getSize().getValue());
+ });
+
+ Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
const MachineMemOperand *MMO = *MI.memoperands_begin();
const unsigned MemSize = 8 * MMO->getSize().getValue();
- return (MemSize == 16 && MMO->getAlign() >= Align(2)) ||
- (MemSize == 8 && MMO->getAlign() >= Align(1));
+ return MemSize == 16 || MemSize == 8;
+ });
+
+ Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return 8 * MMO->getSize().getValue() == 32;
});
auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
(isConst || isInvMMO || isNoClobberMMO);
// clang-format off
+ // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
addRulesForGOpcs({G_LOAD})
- .Any({{DivB32, DivP0}, {{VgprB32}, {VgprP0}}})
- .Any({{DivB32, UniP0}, {{VgprB32}, {VgprP0}}})
-
- .Any({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}})
- .Any({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
- .Any({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
- .Any({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}})
- .Any({{{UniB64, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
- .Any({{{UniB96, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
- .Any({{{UniB128, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
- .Any({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}})
- .Any({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}})
-
- .Any({{DivB32, UniP3}, {{VgprB32}, {VgprP3}}})
- .Any({{{UniB32, UniP3}, isAlign4 && isUL}, {{SgprB32}, {SgprP3}}})
- .Any({{{UniB32, UniP3}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP3}}})
-
- .Any({{{DivB256, DivP4}}, {{VgprB256}, {VgprP4}, SplitLoad}})
- .Any({{{UniB32, UniP4}, isNaturalAlignedSmall && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) // i8 and i16 load
- .Any({{{UniB32, UniP4}, isAlign4 && isUL}, {{SgprB32}, {SgprP4}}})
- .Any({{{UniB96, UniP4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasUnalignedLoads)
- .Any({{{UniB96, UniP4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasUnalignedLoads)
- .Any({{{UniB96, UniP4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasUnalignedLoads)
- .Any({{{UniB128, UniP4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
- .Any({{{UniB256, UniP4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
- .Any({{{UniB512, UniP4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
- .Any({{{UniB32, UniP4}, !isNaturalAlignedSmall || !isUL}, {{UniInVgprB32}, {VgprP4}}}, hasSMRDSmall) // i8 and i16 load
- .Any({{{UniB32, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP4}}})
- .Any({{{UniB256, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP4}, SplitLoad}})
- .Any({{{UniB512, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP4}, SplitLoad}})
-
- .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}});
-
- addRulesForGOpcs({G_ZEXTLOAD}) // i8 and i16 zero-extending loads
- .Any({{{UniB32, UniP3}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP3}}})
- .Any({{{UniB32, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP4}}});
+ // flat, addrspace(0), never uniform - flat_load
+ .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
+ .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
+ .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
+ .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
+ .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
+
+ // global, addrspace(1)
+ // divergent - global_load
+ .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
+ .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
+ .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
+ .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
+ .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
+ .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
+ .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
+
+ // uniform - s_load
+ .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
+ .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
+ .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
+ // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
+ .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
+ .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
+ .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
+ .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
+ .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
+ .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
+ .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
+ .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
+ .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
+
+ // Uniform via global or buffer load, for example volatile or non-aligned
+ // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
+ // selected as global_load, use SgprP1 for pointer instead to match
+ // patterns without flat-for-global, default for GFX7 and older.
+ // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
+ // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
+ .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
+ .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
+ .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
+ .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
+ .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
+ .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
+ .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
+ .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
+ .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
+
+ // local, addrspace(3) - ds_load
+ .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
+ .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
+ .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
+ .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
+ .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
+
+ .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
+ .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
+ .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
+ .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
+ .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
+
+ // constant, addrspace(4)
+ // divergent - global_load
+ .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
+ .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
+ .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
+ .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
+ .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
+ .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
+ .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
+
+ // uniform - s_load
+ .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
+ .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
+ .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
+ .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
+ .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
----------------
Pierre-vh wrote:
we should really tablegen all of this eventually. When this whole thing is close to being default enabled and I have some downtime I'll look into it
https://github.com/llvm/llvm-project/pull/153176
More information about the llvm-commits
mailing list