[llvm] [TableGen][GlobalISel] Add rule-wide type inference (PR #66377)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 6 22:10:15 PST 2023
================
@@ -1639,6 +1695,471 @@ class PrettyStackTraceEmit : public PrettyStackTraceEntry {
}
};
+//===- CombineRuleOperandTypeChecker --------------------------------------===//
+
+/// This is a wrapper around OperandTypeChecker specialized for Combiner Rules.
+/// On top of doing the same things as OperandTypeChecker, this also attempts to
+/// infer as many types as possible for temporary register defs & immediates in
+/// apply patterns.
+///
+/// The inference is trivial and leverages the MCOI OperandTypes encoded in
+/// CodeGenInstructions to infer types across patterns in a CombineRule. It's
+/// thus very limited and only supports CodeGenInstructions (but that's the main
+/// use case so it's fine).
+///
+/// We only try to infer untyped operands in apply patterns when they're temp
+/// reg defs, or immediates. Inference always outputs a `TypeOf<$x>` where $x is
+/// a named operand from a match pattern.
+class CombineRuleOperandTypeChecker : private OperandTypeChecker {
+public:
+ CombineRuleOperandTypeChecker(const Record &RuleDef,
+ const OperandTable &MatchOpTable)
+ : OperandTypeChecker(RuleDef.getLoc()), RuleDef(RuleDef),
+ MatchOpTable(MatchOpTable) {}
+
+ /// Records and checks a 'match' pattern.
+ bool processMatchPattern(InstructionPattern &P);
+
+ /// Records and checks an 'apply' pattern.
+ bool processApplyPattern(InstructionPattern &P);
+
+ /// Propagates types, then perform type inference and do a second round of
+ /// propagation in the apply patterns only if any types were inferred.
+ void propagateAndInferTypes();
+
+private:
+ /// TypeEquivalenceClasses are groups of operands of an instruction that share
+ /// a common type.
+ ///
+ /// e.g. [[a, b], [c, d]] means a and b have the same type, and c and
+ /// d have the same type too. b/c and a/d don't have to have the same type,
+ /// though.
+ ///
+ /// NOTE: We use a SetVector, not a Set. This is to guarantee a stable
+ /// iteration order which is important because:
+ /// - During inference, we iterate that set and pick the first suitable
+ /// candidate. Using a normal set could make inference inconsistent across
+ /// runs if the Set uses the StringRef ptr to cache values.
+ /// - We print this set if DebugInfer is set, and we don't want our tests to
+ /// fail randomly due to the Set's iteration order changing.
+ using TypeEquivalenceClasses = std::vector<SetVector<StringRef>>;
+
+ static std::string toString(const SetVector<StringRef> &EqClass) {
+ return "[" + join(EqClass, ", ") + "]";
+ }
+
+ /// \returns true for `OPERAND_GENERIC_` 0 through 5.
+ /// These are the MCOI types that can be registers. The other MCOI types are
+ /// either immediates, or fancier operands used only post-ISel, so we don't
+ /// care about them for combiners.
+ static bool canMCOIOperandTypeBeARegister(StringRef MCOIType) {
+ // Assume OPERAND_GENERIC_0 through 5 can be registers. The other MCOI
+ // OperandTypes are either never used in gMIR, or not relevant (e.g.
+ // OPERAND_GENERIC_IMM, which is definitely never a register).
+ return MCOIType.drop_back(1).ends_with("OPERAND_GENERIC_");
+ }
+
+ /// Finds the "MCOI::"" operand types for each operand of \p CGP.
+ ///
+ /// This is a bit trickier than it looks because we need to handle variadic
+ /// in/outs.
+ ///
+ /// e.g. for
+ /// (G_BUILD_VECTOR $vec, $x, $y) ->
+ /// [MCOI::OPERAND_GENERIC_0, MCOI::OPERAND_GENERIC_1,
+ /// MCOI::OPERAND_GENERIC_1]
+ ///
+ /// For unknown types (which can happen in variadics where varargs types are
+ /// inconsistent), a unique name is given, e.g. "unknown_type_0".
+ static std::vector<std::string>
+ getMCOIOperandTypes(const CodeGenInstructionPattern &CGP);
+
+ /// Adds the TypeEquivalenceClasses for \p P in \p OutTECs.
+ void getInstEqClasses(const InstructionPattern &P,
+ TypeEquivalenceClasses &OutTECs) const;
+
+ /// Calculates the TypeEquivalenceClasses for each instruction, then merges
+ /// them into a common set of TypeEquivalenceClasses for the whole rule.
+ ///
+ /// This works by repeatedly merging intersecting type equivalence classes
+ /// until no more merging occurs.
+ ///
+ /// This essentially applies the "transitive" part of type inference. Let's
+ /// take the following equivalence classes:
+ /// inst0: [a, b], [c, d]
+ /// inst1: [b, c]
+ ///
+ /// If we see inst0 alone, we can't say that a and d have the same type -
+ /// they're not in the same equivalence classes. However if we just use logic,
+ /// we can say: "a == d because a == b, b == c and c == d".
+ ///
+ /// Merging condenses that information into a single big equivalence class
+ /// which can be looked at alone to make the same deduction.
+ /// rule: [a, b, c, d]
+ TypeEquivalenceClasses getRuleEqClasses() const;
+
+ /// Tries to infer the type of the \p ImmOpIdx -th operand of \p IP using \p
+ /// TECs.
+ ///
+ /// This is achieved by trying to find a named operand in \p IP that shares
+ /// the same type as \p ImmOpIdx, and using \ref inferNamedOperandType on that
+ /// operand instead.
+ ///
+ /// \returns the inferred type or an empty PatternType if inference didn't
+ /// succeed.
+ PatternType inferImmediateType(const InstructionPattern &IP,
+ unsigned ImmOpIdx,
+ const TypeEquivalenceClasses &TECs) const;
+
+ /// Looks inside \p TECs to infer \p OpName's type.
+ ///
+ /// \returns the inferred type or an empty PatternType if inference didn't
+ /// succeed.
+ PatternType inferNamedOperandType(const InstructionPattern &IP,
+ StringRef OpName,
+ const TypeEquivalenceClasses &TECs) const;
+
+ const Record &RuleDef;
+ SmallVector<InstructionPattern *, 8> MatchPats;
+ SmallVector<InstructionPattern *, 8> ApplyPats;
+
+ const OperandTable &MatchOpTable;
+};
+
+bool CombineRuleOperandTypeChecker::processMatchPattern(InstructionPattern &P) {
+ MatchPats.push_back(&P);
+ return check(P, /*CheckTypeOf*/ [](const auto &) {
+ // GITypeOf in 'match' is currently always rejected by the
+ // CombineRuleBuilder after inference is done.
+ return true;
+ });
+}
+
+bool CombineRuleOperandTypeChecker::processApplyPattern(InstructionPattern &P) {
+ ApplyPats.push_back(&P);
+ return check(P, /*CheckTypeOf*/ [&](const PatternType &Ty) {
+ // GITypeOf<"$x"> can only be used if "$x" is a matched operand.
+ const auto OpName = Ty.getTypeOfOpName();
+ if (MatchOpTable.lookup(OpName).Found)
+ return true;
+
+ PrintError(RuleDef.getLoc(), "'" + OpName + "' ('" + Ty.str() +
+ "') does not refer to a matched operand!");
+ return false;
+ });
+}
+
+void CombineRuleOperandTypeChecker::propagateAndInferTypes() {
+ /// First step here is to propagate types using the OperandTypeChecker. That
+ /// way we ensure all uses of a given register have consistent types.
+ propagateTypes();
+
+ /// Build the TypeEquivalenceClasses for the whole rule.
+ const TypeEquivalenceClasses TECs = getRuleEqClasses();
+
+ /// Look at the apply patterns and find operands that need to be
+ /// inferred. We then try to find an equivalence class that they're a part of
+ /// and select the best operand to use for the `GITypeOf` type. We prioritize
+ /// defs of matched instructions because those are guaranteed to be registers.
+ bool InferredAny = false;
+ for (auto *Pat : ApplyPats) {
+ for (unsigned K = 0; K < Pat->operands_size(); ++K) {
+ auto &Op = Pat->getOperand(K);
+
+ // We only want to take a look at untyped defs or immediates.
+ if ((!Op.isDef() && !Op.hasImmValue()) || Op.getType())
+ continue;
+
+ // Infer defs & named immediates.
+ if (Op.isDef() || Op.isNamedImmediate()) {
+ // Check it's not a redefinition of a matched operand.
+ // In such cases, inference is not necessary because we just copy
+ // operands and don't create temporary registers.
+ if (MatchOpTable.lookup(Op.getOperandName()).Found)
+ continue;
+
+ // Inference is needed here, so try to do it.
+ if (PatternType Ty =
+ inferNamedOperandType(*Pat, Op.getOperandName(), TECs)) {
+ if (DebugTypeInfer)
+ errs() << "INFER: " << Op.describe() << " -> " << Ty.str() << "\n";
----------------
arsenm wrote:
Single quotes. Could also directly have operator<< For Ty to avoid the .str
https://github.com/llvm/llvm-project/pull/66377
More information about the llvm-commits
mailing list