[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

Fri Jun 13 07:51:22 PDT 2025

================
@@ -259,32 +306,40 @@ Error IR2VecVocabAnalysis::readVocabulary() {
     return createFileError(VocabFile, BufOrError.getError());
 
   auto Content = BufOrError.get()->getBuffer();
-  json::Path::Root Path("");
+
   Expected<json::Value> ParsedVocabValue = json::parse(Content);
   if (!ParsedVocabValue)
     return ParsedVocabValue.takeError();
 
-  bool Res = json::fromJSON(*ParsedVocabValue, Vocabulary, Path);
-  if (!Res)
-    return createStringError(errc::illegal_byte_sequence,
-                             "Unable to parse the vocabulary");
+  ir2vec::Vocab OpcodeVocab, TypeVocab, ArgVocab;
+  unsigned OpcodeDim, TypeDim, ArgDim;
----------------
mtrofin wrote:

Initialize at declaration

https://github.com/llvm/llvm-project/pull/143986