[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RBLegalize (PR #112864)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Oct 19 08:06:23 PDT 2024
================
@@ -0,0 +1,258 @@
+//===- AMDGPURBLegalizeRules -------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURBLEGALIZERULES_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPURBLEGALIZERULES_H
+
+#include "llvm/CodeGen/MachineUniformityAnalysis.h"
+
+namespace llvm {
+
+class GCNSubtarget;
+
+namespace AMDGPU {
+
+// IDs used to build predicate for RBSRule. Predicate can have one or more IDs
+// and each represents a check for 'uniform or divergent' + LLT or just LLT on
+// register operand.
+// Most often checking one operand is enough to decide which RegBankLLTMapping
+// to apply (see Fast Rules), IDs are useful when two or more operands need to
+// be checked.
+enum UniformityLLTOpPredicateID {
+ _,
+ // scalars
+ S1,
+ S16,
+ S32,
+ S64,
+
+ UniS1,
+ UniS16,
+ UniS32,
+ UniS64,
+
+ DivS1,
+ DivS32,
+ DivS64,
+
+ // pointers
+ P1,
+
+ DivP1,
+
+ // vectors
+ V2S16,
+ V2S32,
+ V3S32,
+ V4S32,
+};
+
+// How to apply register bank on register operand.
+// In most cases, this serves as a LLT and register bank assert.
+// Can change operands and insert copies, extends, truncs, and readfirstlanes.
+// Anything more complicated requires LoweringMethod.
+enum RegBankLLTMapingApplyID {
+ Invalid,
+ None,
+ IntrId,
+ Imm,
+ Vcc,
+
+ // sgpr scalars, pointers, vectors and B-types
+ Sgpr16,
+ Sgpr32,
+ Sgpr64,
+ SgprV4S32,
+
+ // vgpr scalars, pointers, vectors and B-types
+ Vgpr32,
+ Vgpr64,
+ VgprP1,
+ VgprV4S32,
+
+ // Dst only modifiers: read-any-lane and truncs
+ UniInVcc,
+ UniInVgprS32,
+ UniInVgprV4S32,
+
+ Sgpr32Trunc,
+
+ // Src only modifiers: waterfalls, extends
+ Sgpr32AExt,
+ Sgpr32AExtBoolInReg,
+ Sgpr32SExt,
+};
+
+// Instruction needs to be replaced with sequence of instructions. Lowering was
+// not done by legalizer since instructions is available in either SGPR or VGPR.
+// For example S64 AND is available on SGPR, for that reason S64 AND is legal in
+// context of Legalizer that only checks LLT. But S64 AND is not available on
+// VGPR. Lower it to two S32 VGPR ANDs.
+enum LoweringMethodID {
+ DoNotLower,
+ UniExtToSel,
+ VgprToVccCopy,
+ SplitTo32,
+ Ext32To64,
+ UniCstExt,
+};
+
+enum FastRulesTypes {
+ No,
+ Standard, // S16, S32, S64, V2S16
+ Vector, // S32, V2S32, V3S32, V4S32
+};
+
+struct RegBankLLTMapping {
+ SmallVector<RegBankLLTMapingApplyID, 2> DstOpMapping;
+ SmallVector<RegBankLLTMapingApplyID, 4> SrcOpMapping;
+ LoweringMethodID LoweringMethod;
+ RegBankLLTMapping(
+ std::initializer_list<RegBankLLTMapingApplyID> DstOpMappingList,
+ std::initializer_list<RegBankLLTMapingApplyID> SrcOpMappingList,
+ LoweringMethodID LoweringMethod = DoNotLower);
+};
+
+struct PredicateMapping {
+ SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes;
+ std::function<bool(const MachineInstr &)> TestFunc;
+ PredicateMapping(
+ std::initializer_list<UniformityLLTOpPredicateID> OpList,
+ std::function<bool(const MachineInstr &)> TestFunc = nullptr);
+
+ bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI,
+ const MachineRegisterInfo &MRI) const;
+};
+
+struct RBSRule {
+ PredicateMapping Predicate;
+ RegBankLLTMapping OperandMapping;
+};
+
+class SetOfRulesForOpcode {
+ // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one.
+ SmallVector<RBSRule, 4> Rules;
+
+ // "Fast Rules"
+ // Instead of testing each 'Rules[i].Predicate' we do direct access to
+ // RegBankLLTMapping using getFastPredicateSlot. For example if:
+ // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32
+ // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32
+ FastRulesTypes FastTypes = No;
+#define InvMapping RegBankLLTMapping({Invalid}, {Invalid})
+ RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
+ RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
+
+public:
+ SetOfRulesForOpcode();
+ SetOfRulesForOpcode(FastRulesTypes FastTypes);
+
+ const RegBankLLTMapping &
+ findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const MachineUniformityInfo &MUI) const;
+
+ void addRule(RBSRule Rule);
+
+ void addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
+ RegBankLLTMapping RuleApplyIDs);
+ void addFastRuleUniform(UniformityLLTOpPredicateID Ty,
+ RegBankLLTMapping RuleApplyIDs);
+
+private:
+ int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const;
+};
+
+// Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a
+// little more efficient.
+class RegBankLegalizeRules {
+ const GCNSubtarget *ST;
+ MachineRegisterInfo *MRI;
+ // Separate maps for G-opcodes and instrinsics since they are in differents
+ // enums. Multiple opcodes can share same set of rules.
+ // RulesAlias = map<Opcode, KeyOpcode>
+ // Rules = map<KeyOpcode, SetOfRulesForOpcode>
+ SmallDenseMap<unsigned, unsigned, 256> GRulesAlias;
+ SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules;
+ SmallDenseMap<unsigned, unsigned, 128> IRulesAlias;
+ SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules;
+ class RuleSetInitializer {
+ SetOfRulesForOpcode *RuleSet;
+
+ public:
+ // Used for clang-format line breaks and to force writing all rules for
+ // opcode in same place.
+ template <class AliasMap, class RulesMap>
+ RuleSetInitializer(std::initializer_list<unsigned> OpcList,
+ AliasMap &RulesAlias, RulesMap &Rules,
+ FastRulesTypes FastTypes = No) {
+ unsigned KeyOpcode = *OpcList.begin();
+ for (unsigned Opc : OpcList) {
+ auto [_, NewInput] = RulesAlias.try_emplace(Opc, KeyOpcode);
+ assert(NewInput && "Can't redefine existing Rules");
+ }
+
+ auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes);
+ assert(NewInput && "Can't redefine existing Rules");
+
+ RuleSet = &DenseMapIter->second;
+ }
+
+ RuleSetInitializer(const RuleSetInitializer &) = delete;
+ RuleSetInitializer &operator=(const RuleSetInitializer &) = delete;
+ RuleSetInitializer(RuleSetInitializer &&) = delete;
+ RuleSetInitializer &operator=(RuleSetInitializer &&) = delete;
+ ~RuleSetInitializer() = default;
+
+ RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty,
+ RegBankLLTMapping RuleApplyIDs,
+ bool STPred = true) {
+ if (STPred)
+ RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs);
+ return *this;
+ }
+
+ RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty,
+ RegBankLLTMapping RuleApplyIDs,
+ bool STPred = true) {
+ if (STPred)
+ RuleSet->addFastRuleUniform(Ty, RuleApplyIDs);
+ return *this;
+ }
+
+ RuleSetInitializer &Any(RBSRule Init, bool STPred = true) {
+ if (STPred)
+ RuleSet->addRule(Init);
+ return *this;
+ }
+ };
+
+ RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
+ FastRulesTypes FastTypes = No);
+
+ RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
+ FastRulesTypes FastTypes = No);
+
+public:
+ // Initialize rules for all opcodes.
+ RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI);
+
+ // In case we don't want to regenerate same rules, we can use already
+ // generated rules but need to refresh references to objects that are
+ // created for this run.
+ void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) {
+ ST = &_ST;
+ MRI = &_MRI;
+ };
+
+ const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const;
+};
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif
----------------
arsenm wrote:
Missing end of file newline
https://github.com/llvm/llvm-project/pull/112864
More information about the llvm-branch-commits
mailing list