[llvm] e5e3dcc - [mlgo] Add in-development instruction based features for regalloc advisor
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 17 12:55:11 PDT 2022
Author: Aiden Grossman
Date: 2022-09-17T19:54:45Z
New Revision: e5e3dccd0741c2cf6e1885f0b6053fcfc6684102
URL: https://github.com/llvm/llvm-project/commit/e5e3dccd0741c2cf6e1885f0b6053fcfc6684102
DIFF: https://github.com/llvm/llvm-project/commit/e5e3dccd0741c2cf6e1885f0b6053fcfc6684102.diff
LOG: [mlgo] Add in-development instruction based features for regalloc advisor
This patch adds in instruction based features to the regalloc advisor
gated behind a flag so a user can decide at runtime whether or not they
want to enable the feature. The features are only enabled when LLVM is
compiled in MLGO develpment mode (LLVM_HAVE_TF_API) is set to true.
To extract the instruction features, I'm taking a list of segments from
each LiveInterval and noting the start and end SlotIndices. This list is then
sorted based on the start SlotIndex and I iterate through each SlotIndex
to grab instructions, making sure to check for overlaps. This results in
a vector of opcodes and binary mapping matrix that maps live ranges to the
opcodes of the instructions within that LR.
Reviewed By: mtrofin
Differential Revision: https://reviews.llvm.org/D131930
Added:
llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll
llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp
Modified:
llvm/include/llvm/CodeGen/SlotIndexes.h
llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
llvm/unittests/CodeGen/CMakeLists.txt
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h
index 5df6520f7cf6c..403a94c53dc43 100644
--- a/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -108,9 +108,6 @@ class raw_ostream;
PointerIntPair<IndexListEntry*, 2, unsigned> lie;
- SlotIndex(IndexListEntry *entry, unsigned slot)
- : lie(entry, slot) {}
-
IndexListEntry* listEntry() const {
assert(isValid() && "Attempt to compare reserved index.");
#ifdef EXPENSIVE_CHECKS
@@ -139,6 +136,11 @@ class raw_ostream;
/// Construct an invalid index.
SlotIndex() = default;
+ // Creates a SlotIndex from an IndexListEntry and a slot. Generally should
+ // not be used. This method is only public to facilitate writing certain
+ // unit tests.
+ SlotIndex(IndexListEntry *entry, unsigned slot) : lie(entry, slot) {}
+
// Construct a new slot index from the given one, and set the slot.
SlotIndex(const SlotIndex &li, Slot s) : lie(li.listEntry(), unsigned(s)) {
assert(lie.getPointer() != nullptr &&
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 17f9e3ede639a..0345efaf72c6b 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/NoInferenceModelRunner.h"
#include "llvm/Analysis/Utils/TrainingLogger.h"
#endif
+#include "MLRegallocEvictAdvisor.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
@@ -64,6 +65,13 @@ static cl::opt<std::string> ModelUnderTraining(
"regalloc-model", cl::Hidden,
cl::desc("The model being trained for register allocation eviction"));
+static cl::opt<bool> EnableDevelopmentFeatures(
+ "regalloc-enable-development-features", cl::Hidden,
+ cl::desc("Whether or not to enable features under development for the ML "
+ "regalloc advisor"));
+
+#else
+static const bool EnableDevelopmentFeatures = false;
#endif // #ifdef LLVM_HAVE_TF_API
extern cl::opt<unsigned> EvictInterferenceCutoff;
@@ -110,20 +118,9 @@ INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
// Common ML Advisor declarations
// ===================================
namespace {
-// This is the maximum number of interfererring ranges. That's the number of
-// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
-// For X86, that's 32.
-// TODO: find a way to get this, statically, in a programmatic way.
-static const int64_t MaxInterferences = 32;
-
-// Logically, we can think of the feature set given to the evaluator as a 2D
-// matrix. The rows are the features (see next). The columns correspond to the
-// interferences. We treat the candidate virt reg as an 'interference', too, as
-// its feature set is the same as that of the interferring ranges. So we'll have
-// MaxInterferences + 1 columns and by convention, we will use the last column
-// for the virt reg seeking allocation.
-static const int64_t CandidateVirtRegPos = MaxInterferences;
-static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1;
+// The model can only accept a specified number of opcodes and will error it if
+// fed an opcode it hasn't seen before. This constant sets the current cutoff.
+static const int OpcodeValueCutoff = 17716;
// Most features are as described above, so we'll reuse this vector in defining
// them.
@@ -193,6 +190,19 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
"lowest stage of an interval in this LR") \
M(float, progress, {1}, "ratio of current queue size to initial size")
+#ifdef LLVM_HAVE_TF_API
+#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \
+ M(int64_t, instructions, InstructionsShape, \
+ "Opcodes of the instructions covered by the eviction problem")
+
+#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \
+ M(int64_t, instructions_mapping, InstructionsMappingShape, \
+ "A binary matrix mapping LRs to instruction opcodes")
+#else
+#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
+#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
+#endif
+
// The model learns to pick one of the mask == 1 interferences. This is the
// name of the output tensor. The contract with the model is that the output
// will be guaranteed to be to a mask == 1 position. Using a macro here to
@@ -201,10 +211,17 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
// Named features index.
enum FeatureIDs {
-#define _FEATURE_IDX(_, name, __, ___) name,
- RA_EVICT_FEATURES_LIST(_FEATURE_IDX)
+#define _FEATURE_IDX_SIMPLE(_, name, __, ___) name
+#define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D),
+ RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount,
+#ifdef LLVM_HAVE_TF_API
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount,
+#else
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX)
+#endif // #ifdef LLVM_HAVE_TF_API
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount
#undef _FEATURE_IDX
- FeatureCount
+#undef _FEATURE_IDX_SIMPLE
};
// The ML advisor will typically have a sparse input to the evaluator, because
@@ -223,7 +240,11 @@ void resetInputs(MLModelRunner &Runner) {
std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \
getTotalSize<TYPE>(SHAPE));
RA_EVICT_FEATURES_LIST(_RESET)
+ if (EnableDevelopmentFeatures) {
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET)
#undef _RESET
+ }
}
// Per-live interval components that get aggregated into the feature values
@@ -272,11 +293,11 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor {
/// Load the features of the given VirtReg (allocated or not) at column Pos,
/// but if that can't be evicted, return false instead.
- bool loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
- bool IsHint,
- const SmallVirtRegSet &FixedRegisters,
- llvm::SmallVectorImpl<float> &Largest,
- size_t Pos) const;
+ bool
+ loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
+ bool IsHint, const SmallVirtRegSet &FixedRegisters,
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
private:
static float getInitialQueueSize(const MachineFunction &MF);
@@ -288,8 +309,8 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor {
void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
llvm::SmallVectorImpl<float> &Largest, size_t Pos,
- int64_t IsHint, int64_t LocalIntfsCount,
- float NrUrgent) const;
+ int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
// Point-in-time: we didn't learn this, so we always delegate to the
// default.
@@ -332,7 +353,13 @@ class ReleaseModeEvictionAdvisorAnalysis final
public:
ReleaseModeEvictionAdvisorAnalysis()
: RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {
- InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
+ if (EnableDevelopmentFeatures) {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(
+ _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
+ } else {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
+ }
}
// support for isa<> and dyn_cast.
static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
@@ -399,12 +426,25 @@ class DevelopmentModeEvictionAdvisorAnalysis final
public:
DevelopmentModeEvictionAdvisorAnalysis()
: RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {
- InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
- TrainingInputFeatures = {
- RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
- TensorSpec::createSpec<float>("action_discount", {1}),
- TensorSpec::createSpec<int32_t>("action_step_type", {1}),
- TensorSpec::createSpec<float>("action_reward", {1})};
+ if (EnableDevelopmentFeatures) {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(
+ _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
+ TrainingInputFeatures = {
+ RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})};
+ } else {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
+ TrainingInputFeatures = {
+ RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})};
+ }
}
// support for isa<> and dyn_cast.
static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
@@ -535,7 +575,8 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
bool MLEvictAdvisor::loadInterferenceFeatures(
const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
const SmallVirtRegSet &FixedRegisters,
- llvm::SmallVectorImpl<float> &Largest, size_t Pos) const {
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos,
+ llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
// It is only possible to evict virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
// leave unavailable
@@ -594,7 +635,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
// OK, so if we made it this far, this LR is an eviction candidate, load its
// features.
extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
- NrUrgent);
+ NrUrgent, LRPosInfo);
return true;
}
@@ -638,6 +679,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
// reset all the features to 0) Use Pos to capture the column we load
// features at - in AllocationOrder order.
size_t Pos = 0;
+ SmallVector<LRStartEndInfo, NumberOfInterferences> LRPosInfo;
for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
++I, ++Pos) {
MCRegister PhysReg = *I;
@@ -647,7 +689,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
continue;
}
if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
- Largest, Pos)) {
+ Largest, Pos, LRPosInfo)) {
++Available;
Regs[Pos] = std::make_pair(PhysReg, true);
}
@@ -665,9 +707,25 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
CandidateVirtRegPos, /*IsHint*/ 0,
/*LocalIntfsCount*/ 0,
- /*NrUrgent*/ 0.0);
+ /*NrUrgent*/ 0.0, LRPosInfo);
assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
"nothing to allocate initially.");
+#ifdef LLVM_HAVE_TF_API
+ if (EnableDevelopmentFeatures) {
+ extractInstructionFeatures(
+ LRPosInfo, Runner,
+ [this](SlotIndex InputIndex) -> int {
+ auto *CurrentMachineInstruction =
+ LIS->getInstructionFromIndex(InputIndex);
+ if (!CurrentMachineInstruction) {
+ return -1;
+ }
+ return CurrentMachineInstruction->getOpcode();
+ },
+ FeatureIDs::instructions, FeatureIDs::instructions_mapping,
+ LIS->getSlotIndexes()->getLastIndex());
+ }
+#endif // #ifdef LLVM_HAVE_TF_API
// Normalize the features.
for (auto &V : Largest)
V = V ? V : 1.0;
@@ -752,7 +810,8 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
void MLEvictAdvisor::extractFeatures(
const SmallVectorImpl<const LiveInterval *> &Intervals,
llvm::SmallVectorImpl<float> &Largest, size_t Pos, int64_t IsHint,
- int64_t LocalIntfsCount, float NrUrgent) const {
+ int64_t LocalIntfsCount, float NrUrgent,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
int64_t NrDefsAndUses = 0;
int64_t NrBrokenHints = 0;
double R = 0.0;
@@ -799,6 +858,13 @@ void MLEvictAdvisor::extractFeatures(
HintWeights += LIFC.HintWeights;
NrRematerializable += LIFC.IsRemat;
+
+ if (EnableDevelopmentFeatures) {
+ for (auto CurrentSegment : LI) {
+ LRPosInfo.push_back(
+ LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos});
+ }
+ }
}
size_t Size = 0;
if (!Intervals.empty()) {
@@ -841,8 +907,110 @@ void MLEvictAdvisor::extractFeatures(
#undef SET
}
+void extractInstructionFeatures(SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
+ MLModelRunner *RegallocRunner,
+ function_ref<int(SlotIndex)> GetOpcode,
+ const int InstructionsIndex,
+ const int InstructionsMappingIndex,
+ const SlotIndex LastIndex) {
+ // This function extracts instruction based features relevant to the eviction
+ // problem currently being solved. This function ends up extracting two
+ // tensors.
+ // 1 - A vector of size max instruction count. It contains the opcodes of the
+ // instructions spanned by all the intervals in the current instance of the
+ // eviction problem.
+ // 2 - A binary mapping matrix of size (LR count * max
+ // instruction count) which maps where the LRs are live to the actual opcodes
+ // for which they are live.
+
+ // Start off by sorting the segments based on the beginning slot index.
+ std::sort(
+ LRPosInfo.begin(), LRPosInfo.end(),
+ [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; });
+ size_t InstructionIndex = 0;
+ size_t CurrentSegmentIndex = 0;
+ SlotIndex CurrentIndex = LRPosInfo[0].Begin;
+ // This loop processes all the segments sequentially by starting at the
+ // beginning slot index of the first segment, iterating through all the slot
+ // indices before the end slot index of that segment (while checking for
+ // overlaps with segments that start at greater slot indices). After hitting
+ // that end index, the current segment being processed gets bumped until they
+ // are all processed or the max instruction count is hit, where everything is
+ // just truncated.
+ while (true) {
+ // If the index that we are currently at is within the current segment and
+ // we haven't hit the max instruction count, continue processing the current
+ // segment.
+ while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End &&
+ InstructionIndex < ModelMaxSupportedInstructionCount) {
+ int CurrentOpcode = GetOpcode(CurrentIndex);
+ // If the current machine instruction is null, skip it
+ if (CurrentOpcode == -1) {
+ // If we're currently at the last index in the SlotIndex analysis,
+ // we can't go any further, so return from the function
+ if (CurrentIndex >= LastIndex) {
+ return;
+ }
+ CurrentIndex = CurrentIndex.getNextIndex();
+ continue;
+ }
+ // Current code assumes we're not going to get any disjointed segments
+ assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex);
+ RegallocRunner->getTensor<int64_t>(InstructionsIndex)[InstructionIndex] =
+ CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0;
+ // set value in the binary mapping matrix for the current instruction
+ auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos;
+ RegallocRunner->getTensor<int64_t>(
+ InstructionsMappingIndex)[CurrentSegmentPosition *
+ ModelMaxSupportedInstructionCount +
+ InstructionIndex] = 1;
+ // All of the segments are sorted based on the beginning slot index, but
+ // this doesn't mean that the beginning slot index of the next segment is
+ // after the end segment of the one being currently processed. This while
+ // loop checks for overlapping segments and modifies the portion of the
+ // column in the mapping matrix for the currently processed instruction
+ // for the LR it is checking. Also make sure that the beginning of the
+ // current segment we're checking for overlap in is less than the current
+ // index, otherwise we're done checking overlaps.
+ size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1;
+ while (OverlapCheckCurrentSegment < LRPosInfo.size() &&
+ LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) {
+ auto OverlapCurrentSegmentPosition =
+ LRPosInfo[OverlapCheckCurrentSegment].Pos;
+ if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) {
+ RegallocRunner->getTensor<int64_t>(
+ InstructionsMappingIndex)[OverlapCurrentSegmentPosition *
+ ModelMaxSupportedInstructionCount +
+ InstructionIndex] = 1;
+ }
+ ++OverlapCheckCurrentSegment;
+ }
+ ++InstructionIndex;
+ if (CurrentIndex >= LastIndex) {
+ return;
+ }
+ CurrentIndex = CurrentIndex.getNextIndex();
+ }
+ // if we've just finished processing through the last segment or if we've
+ // hit the maximum number of instructions, break out of the loop.
+ if (CurrentSegmentIndex == LRPosInfo.size() - 1 ||
+ InstructionIndex >= ModelMaxSupportedInstructionCount) {
+ break;
+ }
+ // If the segments are not overlapping, we need to move to the beginning
+ // index of the next segment to avoid having instructions not attached to
+ // any register.
+ if (LRPosInfo[CurrentSegmentIndex + 1].Begin >
+ LRPosInfo[CurrentSegmentIndex].End) {
+ CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin;
+ }
+ ++CurrentSegmentIndex;
+ }
+}
+
// Development mode-specific implementations
#ifdef LLVM_HAVE_TF_API
+
RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
return new DevelopmentModeEvictionAdvisorAnalysis();
}
@@ -872,7 +1040,10 @@ int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
if (TrainingLog.empty())
return Ret;
size_t CurrentFeature = 0;
- for (; CurrentFeature < FeatureIDs::FeatureCount; ++CurrentFeature) {
+ size_t FeatureCount = EnableDevelopmentFeatures
+ ? FeatureIDs::FeaturesWithDevelopmentCount
+ : FeatureIDs::FeatureCount;
+ for (; CurrentFeature < FeatureCount; ++CurrentFeature) {
Log->logSpecifiedTensorValue(
CurrentFeature, reinterpret_cast<const char *>(
getRunner().getTensorUntyped(CurrentFeature)));
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
new file mode 100644
index 0000000000000..cf9107bf17955
--- /dev/null
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
@@ -0,0 +1,72 @@
+//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Function declarations of utilities related to feature extraction for unit
+// testing.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
+#define LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
+
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+using namespace llvm;
+
+// LRStartEndInfo contains the start and end of a specific live range as
+// slot indices as well as storing the index of the physical register it
+// is assigned to (or 1 above the phys reg count if its the candidate).
+// Used when extracting per-instruction features in the context of a
+// specific eviction problem.
+struct LRStartEndInfo {
+ SlotIndex Begin;
+ SlotIndex End;
+ size_t Pos = 0;
+};
+
+void extractInstructionFeatures(
+ llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
+ MLModelRunner *RegallocRunner, function_ref<int(SlotIndex)> GetOpcode,
+ const int InstructionsIndex, const int InstructionsMappingIndex,
+ const SlotIndex LastIndex);
+
+// This is the maximum number of interfererring ranges. That's the number of
+// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
+// For X86, that's 32.
+// TODO: find a way to get this, statically, in a programmatic way.
+static const int64_t MaxInterferences = 32;
+
+// Logically, we can think of the feature set given to the evaluator as a 2D
+// matrix. The rows are the features (see next). The columns correspond to the
+// interferences. We treat the candidate virt reg as an 'interference', too, as
+// its feature set is the same as that of the interferring ranges. So we'll have
+// MaxInterferences + 1 columns and by convention, we will use the last column
+// for the virt reg seeking allocation.
+static const int64_t CandidateVirtRegPos = MaxInterferences;
+static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1;
+
+// The number of instructions that a specific live range might have is variable,
+// but we're passing in a single matrix of instructions and tensorflow saved
+// models only support a fixed input size, so we have to cap the number of
+// instructions that can be passed along. The specific value was derived from
+// experimentation such that the majority of eviction problems would be
+// completely covered.
+static const int ModelMaxSupportedInstructionCount = 300;
+
+// When extracting per-instruction features, the advisor will currently create
+// a vector of size ModelMaxSupportedInstructionCount to hold the opcodes of the
+// instructions relevant to the eviction problem, and a NumberOfInterferences *
+// ModelMaxSupportedInstructionCount matrix that maps LRs to the instructions
+// that they span.
+static const std::vector<int64_t> InstructionsShape{
+ 1, ModelMaxSupportedInstructionCount};
+static const std::vector<int64_t> InstructionsMappingShape{
+ 1, NumberOfInterferences, ModelMaxSupportedInstructionCount};
+
+#endif // LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll
new file mode 100644
index 0000000000000..9c97314b21cb9
--- /dev/null
+++ b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll
@@ -0,0 +1,52 @@
+; REQUIRES: have_tf_api
+; REQUIRES: x86_64-linux
+;
+; Check that we log the currently in development features correctly with both the default
+; case and with a learned policy.
+;
+; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \
+; RUN: -regalloc-training-log=%t1 -tfutils-text-log \
+; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll
+; RUN: sed -i 's/ \+/ /g' %t1
+; RUN: sed -i 's/\\n key:/\n key:/g' %t1
+; RUN: sed -i 's/\\n feature/\n feature/g' %t1
+; RUN: sed -i 's/\\n/ /g' %t1
+; RUN: FileCheck --input-file %t1 %s
+
+; RUN: rm -rf %t && mkdir %t
+; RUN: %python %S/../../../lib/Analysis/models/gen-regalloc-eviction-test-model.py %t_savedmodel
+; RUN: %python %S/../../../lib/Analysis/models/saved-model-to-tflite.py %t_savedmodel %t
+; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \
+; RUN: -regalloc-training-log=%t2 -tfutils-text-log -regalloc-model=%t \
+; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll
+; RUN: sed -i 's/ \+/ /g' %t2
+; RUN: sed -i 's/\\n key:/\n key:/g' %t2
+; RUN: sed -i 's/\\n feature/\n feature/g' %t2
+; RUN: sed -i 's/\\n/ /g' %t2
+; RUN: FileCheck --input-file %t2 %s
+
+; CHECK-NOT: nan
+; CHECK-LABEL: key: \"instructions\"
+; Check the first five opcodes in the first eviction problem
+; CHECK-NEXT: value: 19
+; CHECK-SAME: value: 19
+; CHECK-SAME: value: 3031
+; CHECK-SAME: value: 1245
+; CHECK-SAME: value: 1264
+; The first eviction problem is significantly less than 300 instructions. Check
+; that there is a zero value
+; CHECK-SAME: value: 0
+; Only the candidate virtreg and the 10th LR are included in this problem. Make
+; sure the other LRs have values of zero.
+; CHECK-LABEL: key: \"instructions_mapping\"
+; CHECK-COUNT-2700: value: 0
+; CHECK-SAME: value: 1
+; Indexing 300 back from where the candidate vr actual resides due to the fact
+; that not all the values between the 10th LR and the candidate are zero.
+; CHECK-COUNT-6600: value: 0
+; CHECK-SAME: value: 1
+; Ensure that we can still go through the mapping matrices for the rest of the
+; eviction problems to make sure we haven't hit the end of the matrix above.
+; There are a total of 23 eviction problems with this test.
+; CHECK-COUNT-22: int64_list
+; CHECK: key: \"is_free\"
diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt
index def69d1d0e1e8..35b0934005e8a 100644
--- a/llvm/unittests/CodeGen/CMakeLists.txt
+++ b/llvm/unittests/CodeGen/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_unittest(CodeGenTests
TypeTraitsTest.cpp
TargetOptionsTest.cpp
TestAsmPrinter.cpp
+ MLRegallocDevelopmentFeatures.cpp
)
add_subdirectory(GlobalISel)
diff --git a/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp b/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp
new file mode 100644
index 0000000000000..022cfab94c895
--- /dev/null
+++ b/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp
@@ -0,0 +1,209 @@
+//===- MLRegAllocDevelopmentFeatures.cpp - test dev MLRegalloc features ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../lib/CodeGen/MLRegallocEvictAdvisor.h"
+#include "llvm/Analysis/NoInferenceModelRunner.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CodeGen.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <vector>
+
+using testing::ContainerEq;
+using testing::Test;
+
+struct LRPosInfoIndexes {
+ size_t StartIndex;
+ size_t EndIndex;
+ size_t PhysReg;
+};
+
+class RegallocDevelopmentFeaturesTest : public ::Test {
+protected:
+ SmallVector<LRStartEndInfo>
+ setupOverlapProblem(const SmallVectorImpl<LRPosInfoIndexes> &Segments,
+ ilist<IndexListEntry> &IndexList) {
+ SmallVector<LRStartEndInfo> PositionsToReturn;
+ PositionsToReturn.reserve(Segments.size());
+ for (auto CurrentPosIndexInfo : Segments) {
+ LRStartEndInfo CurrentPosInfo = {};
+ CurrentPosInfo.Pos = CurrentPosIndexInfo.PhysReg;
+ PositionsToReturn.push_back(CurrentPosInfo);
+ }
+ size_t CurrentSegmentIndex = 0;
+ size_t CurrentIndex = 0;
+ while (CurrentSegmentIndex < Segments.size()) {
+ auto *CurrentLEMem = static_cast<IndexListEntry *>(
+ Allocator.Allocate(sizeof(IndexListEntry), alignof(IndexListEntry)));
+ auto *CurrentListEntry =
+ new (CurrentLEMem) IndexListEntry(nullptr, CurrentIndex);
+ IndexList.push_back(CurrentListEntry);
+ for (size_t CurrentPosInfoIndex = 0;
+ CurrentPosInfoIndex < Segments.size(); ++CurrentPosInfoIndex) {
+ if ((CurrentIndex / SlotIndex::InstrDist) ==
+ Segments[CurrentPosInfoIndex].StartIndex) {
+ PositionsToReturn[CurrentPosInfoIndex].Begin =
+ SlotIndex(CurrentListEntry, 0);
+ } else if ((CurrentIndex / SlotIndex::InstrDist) ==
+ Segments[CurrentPosInfoIndex].EndIndex) {
+ PositionsToReturn[CurrentPosInfoIndex].End =
+ SlotIndex(CurrentListEntry, 0);
+ ++CurrentSegmentIndex;
+ }
+ }
+ CurrentIndex += SlotIndex::InstrDist;
+ }
+ return PositionsToReturn;
+ }
+
+ NoInferenceModelRunner setupModelRunner() {
+ const std::vector<TensorSpec> Inputs{
+ TensorSpec::createSpec<int64_t>("instructions", InstructionsShape),
+ TensorSpec::createSpec<int64_t>("instructions_mapping",
+ InstructionsMappingShape)};
+ LLVMContext Ctx;
+ return NoInferenceModelRunner(Ctx, Inputs);
+ }
+
+ std::vector<int64_t>
+ getExpectedMappingMatrix(SmallVectorImpl<LRPosInfoIndexes> &OverlapSetup) {
+ std::vector<int64_t> ExpectedMappingMatrix(
+ NumberOfInterferences * ModelMaxSupportedInstructionCount, 0);
+ for (auto NewSegment : OverlapSetup) {
+ for (size_t CurrentIndex = NewSegment.StartIndex;
+ CurrentIndex <= NewSegment.EndIndex; ++CurrentIndex) {
+ ExpectedMappingMatrix[NewSegment.PhysReg *
+ ModelMaxSupportedInstructionCount +
+ CurrentIndex] = 1;
+ }
+ }
+ return ExpectedMappingMatrix;
+ }
+
+ void runOverlapTest(SmallVectorImpl<LRPosInfoIndexes> &OverlapSetup) {
+ ilist<IndexListEntry> IndexList;
+ auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList);
+ NoInferenceModelRunner ModelRunner = setupModelRunner();
+ size_t MaxIndex = 0;
+ for (size_t CurrentOverlap = 0; CurrentOverlap < OverlapSetup.size();
+ ++CurrentOverlap) {
+ if (OverlapSetup[CurrentOverlap].EndIndex >
+ OverlapSetup[MaxIndex].EndIndex) {
+ MaxIndex = CurrentOverlap;
+ }
+ }
+ SlotIndex LastIndex = OverlapProblem[MaxIndex].End;
+ extractInstructionFeatures(
+ OverlapProblem, &ModelRunner,
+ [](SlotIndex InputSlot) -> int { return 0; }, 0, 1, LastIndex);
+ std::vector<int64_t> MappingMatrix(
+ ModelRunner.getTensor<int64_t>(1),
+ ModelRunner.getTensor<int64_t>(1) +
+ NumberOfInterferences * ModelMaxSupportedInstructionCount);
+ ASSERT_THAT(MappingMatrix,
+ ContainerEq(getExpectedMappingMatrix(OverlapSetup)));
+ IndexList.clearAndLeakNodesUnsafely();
+ }
+
+ BumpPtrAllocator Allocator;
+};
+
+// meta tests to ensure that test setup works correctly
+
+TEST_F(RegallocDevelopmentFeaturesTest,
+ MetaOverlapInstructionDistancesAreCorrect) {
+ SmallVector<LRPosInfoIndexes, 2> OverlapSetup;
+ OverlapSetup.push_back({0, 5, 0});
+ OverlapSetup.push_back({5, 10, 0});
+ ilist<IndexListEntry> IndexList;
+ auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList);
+ ASSERT_EQ(OverlapProblem[0].End.distance(OverlapProblem[1].End),
+ 5 * SlotIndex::InstrDist);
+ ASSERT_EQ(OverlapProblem[0].End.distance(OverlapProblem[1].Begin), 0);
+}
+
+TEST_F(RegallocDevelopmentFeaturesTest, MetaSlotIndicesAreValid) {
+ SmallVector<LRPosInfoIndexes, 1> OverlapSetup;
+ OverlapSetup.push_back({0, 10, 0});
+ ilist<IndexListEntry> IndexList;
+ auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList);
+ ASSERT_TRUE(OverlapProblem[0].Begin.isValid());
+ ASSERT_TRUE(OverlapProblem[0].End.isValid());
+}
+
+// Testing of feature extraction for per-instruction features
+
+TEST_F(RegallocDevelopmentFeaturesTest, InstructionOpcodesAreCorrect) {
+ SmallVector<LRPosInfoIndexes, 1> OverlapSetup;
+ OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 0});
+ ilist<IndexListEntry> IndexList;
+ auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList);
+ NoInferenceModelRunner ModelRunner = setupModelRunner();
+ SlotIndex LastIndex = OverlapProblem[0].End;
+ SlotIndex FirstIndex = OverlapProblem[0].Begin;
+ extractInstructionFeatures(
+ OverlapProblem, &ModelRunner,
+ [FirstIndex](SlotIndex InputSlot) -> int {
+ return FirstIndex.distance(InputSlot) / SlotIndex::InstrDist;
+ },
+ 0, 1, LastIndex);
+ for (size_t CurrentInstructionIndex = 0;
+ CurrentInstructionIndex < ModelMaxSupportedInstructionCount;
+ ++CurrentInstructionIndex) {
+ ASSERT_EQ(
+ (size_t)ModelRunner.getTensor<int64_t>(0)[CurrentInstructionIndex],
+ CurrentInstructionIndex);
+ }
+}
+
+TEST_F(RegallocDevelopmentFeaturesTest, FullOverlap) {
+ SmallVector<LRPosInfoIndexes, 2> OverlapSetup;
+ OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 0});
+ OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 1});
+ runOverlapTest(OverlapSetup);
+}
+
+TEST_F(RegallocDevelopmentFeaturesTest, PartialOverlap) {
+ SmallVector<LRPosInfoIndexes, 2> OverlapSetup;
+ OverlapSetup.push_back({0, 20, 0});
+ OverlapSetup.push_back({15, 30, 1});
+ runOverlapTest(OverlapSetup);
+}
+
+TEST_F(RegallocDevelopmentFeaturesTest, PartialOverlapOpposite) {
+ SmallVector<LRPosInfoIndexes, 2> OverlapSetup;
+ OverlapSetup.push_back({15, 30, 1});
+ OverlapSetup.push_back({0, 20, 0});
+ runOverlapTest(OverlapSetup);
+}
+
+TEST_F(RegallocDevelopmentFeaturesTest, InternalOverlap) {
+ SmallVector<LRPosInfoIndexes, 2> OverlapSetup;
+ OverlapSetup.push_back({0, 30, 0});
+ OverlapSetup.push_back({10, 20, 1});
+ runOverlapTest(OverlapSetup);
+}
+
+TEST_F(RegallocDevelopmentFeaturesTest, TripleInternalOverlap) {
+ SmallVector<LRPosInfoIndexes, 3> OverlapSetup;
+ OverlapSetup.push_back({0, 30, 0});
+ OverlapSetup.push_back({10, 25, 1});
+ OverlapSetup.push_back({15, 20, 2});
+ runOverlapTest(OverlapSetup);
+}
+
+TEST_F(RegallocDevelopmentFeaturesTest, InternalMultiOverlap) {
+ SmallVector<LRPosInfoIndexes, 3> OverlapSetup;
+ OverlapSetup.push_back({0, 45, 0});
+ OverlapSetup.push_back({30, 40, 1});
+ OverlapSetup.push_back({35, 60, 2});
+ runOverlapTest(OverlapSetup);
+}
More information about the llvm-commits
mailing list