[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
Mircea Trofin via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Sep 17 07:23:24 PDT 2025
================
@@ -261,55 +262,106 @@ void FlowAwareEmbedder::computeEmbeddings(const BasicBlock &BB) const {
BBVecMap[&BB] = BBVector;
}
+// ==----------------------------------------------------------------------===//
+// VocabStorage
+//===----------------------------------------------------------------------===//
+
+VocabStorage::VocabStorage(std::vector<std::vector<Embedding>> &&SectionData)
+ : Sections(std::move(SectionData)), TotalSize([&] {
+ assert(!Sections.empty() && "Vocabulary has no sections");
+ assert(!Sections[0].empty() && "First section of vocabulary is empty");
+ // Compute total size across all sections
+ size_t Size = 0;
+ for (const auto &Section : Sections)
+ Size += Section.size();
+ return Size;
+ }()),
+ Dimension([&] {
+ // Get dimension from the first embedding in the first section - all
+ // embeddings must have the same dimension
+ assert(!Sections.empty() && "Vocabulary has no sections");
+ assert(!Sections[0].empty() && "First section of vocabulary is empty");
+ return static_cast<unsigned>(Sections[0][0].size());
+ }()) {}
+
+const Embedding &VocabStorage::const_iterator::operator*() const {
+ assert(SectionId < Storage->Sections.size() && "Invalid section ID");
+ assert(LocalIndex < Storage->Sections[SectionId].size() &&
+ "Local index out of range");
+ return Storage->Sections[SectionId][LocalIndex];
+}
+
+VocabStorage::const_iterator &VocabStorage::const_iterator::operator++() {
+ ++LocalIndex;
+ // Check if we need to move to the next section
+ while (SectionId < Storage->getNumSections() &&
+ LocalIndex >= Storage->Sections[SectionId].size()) {
+ LocalIndex = 0;
+ ++SectionId;
+ }
+ return *this;
+}
+
+bool VocabStorage::const_iterator::operator==(
+ const const_iterator &Other) const {
+ return Storage == Other.Storage && SectionId == Other.SectionId &&
+ LocalIndex == Other.LocalIndex;
+}
+
+bool VocabStorage::const_iterator::operator!=(
+ const const_iterator &Other) const {
+ return !(*this == Other);
+}
+
// ==----------------------------------------------------------------------===//
// Vocabulary
//===----------------------------------------------------------------------===//
unsigned Vocabulary::getDimension() const {
----------------
mtrofin wrote:
you can move trivial accessors to the header. Better performance in non- [Thin]LTO cases.
https://github.com/llvm/llvm-project/pull/158376
More information about the llvm-branch-commits
mailing list