[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

Fri Jun 13 11:14:18 PDT 2025

================
@@ -259,32 +306,40 @@ Error IR2VecVocabAnalysis::readVocabulary() {
     return createFileError(VocabFile, BufOrError.getError());
 
   auto Content = BufOrError.get()->getBuffer();
-  json::Path::Root Path("");
+
   Expected<json::Value> ParsedVocabValue = json::parse(Content);
   if (!ParsedVocabValue)
     return ParsedVocabValue.takeError();
 
-  bool Res = json::fromJSON(*ParsedVocabValue, Vocabulary, Path);
-  if (!Res)
-    return createStringError(errc::illegal_byte_sequence,
-                             "Unable to parse the vocabulary");
+  ir2vec::Vocab OpcodeVocab, TypeVocab, ArgVocab;
+  unsigned OpcodeDim, TypeDim, ArgDim;
+  if (auto Err = parseVocabSection("Opcodes", *ParsedVocabValue, OpcodeVocab,
----------------
svkeerthy wrote:

Correct. Will put it in the doc.

https://github.com/llvm/llvm-project/pull/143986