[llvm] Makslevental/unison tblgen (PR #150084)

Maksim Levental via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 22 11:34:58 PDT 2025


https://github.com/makslevental created https://github.com/llvm/llvm-project/pull/150084

None

>From 29c754e72251e47143fb734db6c7666f3f64b727 Mon Sep 17 00:00:00 2001
From: max <maksim.levental at gmail.com>
Date: Wed, 2 Jul 2025 12:03:17 -0400
Subject: [PATCH 1/2] [llvm] add unison

---
 llvm/include/llvm/TableGen/Unison.h    | 153 +++++++++
 llvm/lib/TableGen/CMakeLists.txt       |   1 +
 llvm/lib/TableGen/Unison.cpp           | 453 +++++++++++++++++++++++++
 llvm/lib/TableGen/rununi.sh            |   5 +
 llvm/utils/TableGen/Basic/TableGen.cpp |   2 +
 5 files changed, 614 insertions(+)
 create mode 100644 llvm/include/llvm/TableGen/Unison.h
 create mode 100644 llvm/lib/TableGen/Unison.cpp
 create mode 100755 llvm/lib/TableGen/rununi.sh

diff --git a/llvm/include/llvm/TableGen/Unison.h b/llvm/include/llvm/TableGen/Unison.h
new file mode 100644
index 0000000000000..5617ac7506387
--- /dev/null
+++ b/llvm/include/llvm/TableGen/Unison.h
@@ -0,0 +1,153 @@
+//===- llvm/TableGen/Unison.h - Unison tool ---------------------*- C++ -*-===//
+//
+//  Main authors:
+//    Jan Tomljanovic <jan.tomljanovic at sics.se>
+//    Roberto Castaneda Lozano <roberto.castaneda at ri.se>
+//
+//  This file is part of Unison, see http://unison-code.github.io
+//
+//  Copyright (c) 2016, RISE SICS AB
+//  All rights reserved.
+//
+//  Redistribution and use in source and binary forms, with or without
+//  modification, are permitted provided that the following conditions are met:
+//  1. Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//  2. Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//  3. Neither the name of the copyright holder nor the names of its
+//     contributors may be used to endorse or promote products derived from this
+//     software without specific prior written permission.
+//
+//  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+//  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+//  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+//  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+//  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+//  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+//  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+//  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+//  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+//  POSSIBILITY OF SUCH DAMAGE.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Extraction of the following information about each instruction for Unison:
+///   - id (opcode)
+///   - type (linear, call, or branch)
+///   - operands (including use/def information and reg. class, if applicable)
+///   - size
+///   - side effects (including memory reads and writes)
+///   - itinerary
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TABLEGEN_UNISON_H
+#define LLVM_TABLEGEN_UNISON_H
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+// An Operand can be a Register, Label or a Bound (any other Operand that is not
+// interpreted by Unison, such as immediates). If it is a register, UseDef and
+// RegType are defined.
+namespace unison {
+
+struct Operand {
+  enum { Register, Label, Bound } Type;
+  std::string Name;
+  std::string UseDef;
+  std::string RegType;
+};
+
+} // end namespace unison
+
+typedef std::pair<std::string, std::string> StringPair;
+typedef std::vector<StringPair> StringPairVector;
+typedef std::vector<std::string> StringVector;
+typedef std::vector<unison::Operand> OperandVector;
+
+/// Instruction with methods to be printed in .yaml format.
+class Instruction {
+private:
+  std::string Id;
+  std::string Type;
+  OperandVector Operands;
+  StringVector Uses;
+  StringVector Defs;
+  int Size;
+  bool AffectsMem;
+  bool AffectedMem;
+  StringVector AffectsReg;
+  StringVector AffectedReg;
+  std::string Itinerary;
+
+  void printAffs(llvm::raw_ostream &OS, std::string Name, bool Memory,
+                 StringVector Regs);
+  void printUseDefs(llvm::raw_ostream &OS, StringVector UseDefs,
+                    std::string Name);
+  void printAttribute(std::string Name, std::string Value,
+                      llvm::raw_ostream &OS);
+  void printField(std::string Name, std::string Value, llvm::raw_ostream &OS);
+
+public:
+  Instruction(std::string Id, std::string Type, OperandVector Operands,
+              StringVector Uses, StringVector Defs, int Size, bool AffectsMem,
+              bool AffectedMem, StringVector AffectsReg,
+              StringVector AffectedReg, std::string Itinerary);
+  void printId(llvm::raw_ostream &OS);
+  void printType(llvm::raw_ostream &OS);
+  void printOperands(llvm::raw_ostream &OS);
+  void printUses(llvm::raw_ostream &OS);
+  void printDefs(llvm::raw_ostream &OS);
+  void printSize(llvm::raw_ostream &OS);
+  void printAffects(llvm::raw_ostream &OS);
+  void printAffected(llvm::raw_ostream &OS);
+  void printItinerary(llvm::raw_ostream &OS);
+  void printAll(llvm::raw_ostream &OS);
+};
+
+namespace llvm {
+
+/// \brief outputs information for Unison.
+///
+/// Prints extracted information for the Unison compiler as a valid
+/// .yaml file.
+/// \param OS output stream to which it prints the .yaml file.
+/// \param Records structure that holds all the information about the
+/// data which TableGen tool has.
+void EmitUnisonFile(const RecordKeeper &Records, raw_ostream &OS);
+
+StringVector flat(const Record *Rec);
+void printYaml(std::vector<Instruction> Instructions, raw_ostream &OS);
+std::string getRecordItinerary(Record *Rec);
+StringVector getRegisterList(std::string Field, Record *Rec);
+bool getRecordBool(Record *Rec, std::string Field, bool Def);
+int getRecordSize(Record *Rec);
+StringPairVector *parseOperands(std::string Field, Record *Rec);
+StringVector getNames(StringPairVector *List);
+void executeConstraints(StringPairVector *Outs, std::string Cons);
+OperandVector getOperands(StringPairVector *Outs, StringPairVector *ins,
+                          const RecordKeeper &Records);
+void getOperandsFromVector(StringPairVector *Vec, StringPairVector *Help,
+                           OperandVector *Operands, bool Defs,
+                           const RecordKeeper &Records);
+bool isRegister(const Record *Rec);
+bool isLabel(const Record *Rec);
+std::string getRecordType(Record *Rec);
+std::string getRecordId(Record *Rec);
+bool fieldExists(Record *Rec, std::string Field);
+bool allNeededFieldsExist(Record *Rec);
+StringVector split(std::string Str, char Del);
+std::string trim(std::string Str);
+std::string escape(std::string Name);
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/TableGen/CMakeLists.txt b/llvm/lib/TableGen/CMakeLists.txt
index 0f9284c8bb999..1456ddb2eaacf 100644
--- a/llvm/lib/TableGen/CMakeLists.txt
+++ b/llvm/lib/TableGen/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_component_library(LLVMTableGen
   TGLexer.cpp
   TGParser.cpp
   TGTimer.cpp
+  Unison.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/TableGen
diff --git a/llvm/lib/TableGen/Unison.cpp b/llvm/lib/TableGen/Unison.cpp
new file mode 100644
index 0000000000000..680890d52f881
--- /dev/null
+++ b/llvm/lib/TableGen/Unison.cpp
@@ -0,0 +1,453 @@
+//===- Unison.cpp - Unison tool implementation ----------------------------===//
+//
+//  Main authors:
+//    Jan Tomljanovic <jan.tomljanovic at sics.se>
+//    Roberto Castaneda Lozano <roberto.castaneda at ri.se>
+//
+//  This file is part of Unison, see http://unison-code.github.io
+//
+//  Copyright (c) 2016, RISE SICS AB
+//  All rights reserved.
+//
+//  Redistribution and use in source and binary forms, with or without
+//  modification, are permitted provided that the following conditions are met:
+//  1. Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//  2. Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//  3. Neither the name of the copyright holder nor the names of its
+//     contributors may be used to endorse or promote products derived from this
+//     software without specific prior written permission.
+//
+//  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+//  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+//  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+//  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+//  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+//  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+//  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+//  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+//  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+//  POSSIBILITY OF SUCH DAMAGE.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Extraction of the following information about each instruction for Unison:
+///   - id (opcode)
+///   - type (linear, call, or branch)
+///   - operands (including use/def information and reg. class, if applicable)
+///   - size
+///   - side effects (including memory reads and writes)
+///   - itinerary
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Unison.h"
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <set>
+#include <sstream>
+
+Instruction::Instruction(std::string Id0, std::string Type0,
+                         OperandVector Operands0, StringVector Uses0,
+                         StringVector Defs0, int Size0, bool AffectsMem0,
+                         bool AffectedMem0, StringVector AffectsReg0,
+                         StringVector AffectedReg0, std::string Itinerary0)
+    : Id(Id0), Type(Type0), Operands(Operands0), Uses(Uses0), Defs(Defs0),
+      Size(Size0), AffectsMem(AffectsMem0), AffectedMem(AffectedMem0),
+      AffectsReg(AffectsReg0), AffectedReg(AffectedReg0),
+      Itinerary(Itinerary0) {}
+
+void Instruction::printId(llvm::raw_ostream &OS) {
+  std::stringstream Buffer;
+  Buffer << std::setw(8) << " " << std::setw(22) << std::left << "- id:" << Id
+         << '\n';
+  OS << Buffer.str();
+}
+
+void Instruction::printType(llvm::raw_ostream &OS) {
+  printAttribute("type:", Type, OS);
+}
+
+void Instruction::printOperands(llvm::raw_ostream &OS) {
+  std::stringstream Buffer;
+  Buffer << std::setw(10) << " "
+         << "operands:" << '\n';
+  OS << Buffer.str();
+  for (unison::Operand Op : Operands) {
+    std::string Value;
+    switch (Op.Type) {
+    case unison::Operand::Label:
+      Value = "label";
+      break;
+    case unison::Operand::Bound:
+      Value = "bound";
+      break;
+    case unison::Operand::Register:
+      Value = "[register, " + Op.UseDef + ", " + Op.RegType + "]";
+      break;
+    }
+    printField(Op.Name, Value, OS);
+  }
+}
+
+void Instruction::printUseDefs(llvm::raw_ostream &OS, StringVector UseDefs,
+                               std::string Name) {
+  std::string Value = "[";
+  std::string Sep = "";
+  for (std::string UseDef : UseDefs) {
+    Value += (Sep + UseDef);
+    Sep = ", ";
+  }
+  Value += "]";
+  printAttribute(Name + ":", Value, OS);
+}
+
+void Instruction::printUses(llvm::raw_ostream &OS) {
+  printUseDefs(OS, Uses, "uses");
+}
+
+void Instruction::printDefs(llvm::raw_ostream &OS) {
+  printUseDefs(OS, Defs, "defines");
+}
+
+void Instruction::printSize(llvm::raw_ostream &OS) {
+  printAttribute("size:", std::to_string(Size), OS);
+}
+
+void Instruction::printAffects(llvm::raw_ostream &OS) {
+  printAffs(OS, "affects", AffectsMem, AffectsReg);
+}
+
+void Instruction::printAffected(llvm::raw_ostream &OS) {
+  printAffs(OS, "affected-by", AffectedMem, AffectedReg);
+}
+
+void Instruction::printAffs(llvm::raw_ostream &OS, std::string Name,
+                            bool Memory, StringVector Regs) {
+  std::stringstream Buffer;
+  Buffer << std::setw(10) << " " << Name + ":" << '\n';
+  OS << Buffer.str();
+  if (Memory)
+    printField("mem", "memory", OS);
+  for (std::string Reg : Regs)
+    printField(Reg, "register", OS);
+}
+
+void Instruction::printItinerary(llvm::raw_ostream &OS) {
+  printAttribute("itinerary:", Itinerary, OS);
+}
+
+void Instruction::printAll(llvm::raw_ostream &OS) {
+  OS << "\n";
+  Instruction::printId(OS);
+  Instruction::printType(OS);
+  Instruction::printOperands(OS);
+  Instruction::printUses(OS);
+  Instruction::printDefs(OS);
+  Instruction::printSize(OS);
+  Instruction::printAffects(OS);
+  Instruction::printAffected(OS);
+  Instruction::printItinerary(OS);
+}
+
+/// Prints a simple attribute.
+void Instruction::printAttribute(std::string Name, std::string Value,
+                                 llvm::raw_ostream &OS) {
+  std::stringstream Buffer;
+  if (Value.empty())
+    Buffer << std::setw(10) << " " << Name << '\n';
+  else
+    Buffer << std::setw(10) << " " << std::setw(20) << std::left << Name
+           << Value << '\n';
+  OS << Buffer.str();
+}
+
+/// Prints the subelements of a complex attribute.
+void Instruction::printField(std::string Name, std::string Value,
+                             llvm::raw_ostream &OS) {
+  std::string Name1 = "- " + Name + ": ";
+  std::stringstream Buffer;
+  Buffer << std::setw(11) << " " << std::setw(19) << std::left << Name1 << Value
+         << '\n';
+  OS << Buffer.str();
+}
+
+namespace llvm {
+
+void EmitUnisonFile(const RecordKeeper &Records, raw_ostream &OS) {
+  std::vector<Instruction> Instructions;
+  for (const auto &D : Records.getDefs()) {
+    Record *Rec = &(*D.second);
+    if (!allNeededFieldsExist(Rec))
+      continue;
+    std::string Id = getRecordId(Rec);
+    std::string Type = getRecordType(Rec);
+    StringPairVector *OutList = parseOperands("OutOperandList", Rec);
+    StringPairVector *InList = parseOperands("InOperandList", Rec);
+    executeConstraints(OutList, Rec->getValueAsString("Constraints").str());
+    StringVector Uses = getNames(InList);
+    StringVector Defs = getNames(OutList);
+    OperandVector Operands = getOperands(OutList, InList, Records);
+    int Size = getRecordSize(Rec);
+    bool AffectsMem = getRecordBool(Rec, "mayStore", false);
+    bool AffectedMem = getRecordBool(Rec, "mayLoad", false);
+    StringVector AffectsReg = getRegisterList("Defs", Rec);
+    StringVector AffectedReg = getRegisterList("Uses", Rec);
+    std::string Itinerary = getRecordItinerary(Rec);
+    Instruction In(Id, Type, Operands, Uses, Defs, Size, AffectsMem,
+                   AffectedMem, AffectsReg, AffectedReg, Itinerary);
+    Instructions.push_back(In);
+  }
+  printYaml(Instructions, OS);
+}
+
+/// Printing of the instructions to the \p OS in .yaml format.
+void printYaml(std::vector<Instruction> Instructions, raw_ostream &OS) {
+  OS << "---\ninstruction-set:\n\n";
+  std::stringstream Buffer;
+  Buffer << std::setw(3) << " "
+         << "- group: allInstructions"
+         << "\n";
+  Buffer << std::setw(5) << " "
+         << "instructions:"
+         << "\n\n";
+  OS << Buffer.str();
+  for (Instruction In : Instructions)
+    In.printAll(OS);
+}
+
+/// Returns a vector of register names extraced from a \p Field attribute of the
+/// given Record \p Rec . Assumes the \p Field is a list.
+StringVector getRegisterList(std::string Field, Record *Rec) {
+  StringVector Regs;
+  for (auto Val : *(Rec->getValueAsListInit(Field)))
+    Regs.push_back(escape(Val->getAsString()));
+  return Regs;
+}
+
+/// Gets the Itinerary name of the given record.
+std::string getRecordItinerary(Record *Rec) {
+  return Rec->getValueAsDef("Itinerary")->getName().str();
+}
+
+/// Gets the size of the given record.
+int getRecordSize(Record *Rec) { return Rec->getValueAsInt("Size"); }
+
+/// Gets the boolean Value of the given \p Field in the given record \p Rec and
+/// it is not set, then returns the given default Value \p def .
+bool getRecordBool(Record *Rec, std::string Field, bool Def) {
+  bool Unset = false;
+  bool Val = Rec->getValueAsBitOrUnset(Field, Unset);
+  return Unset ? Def : Val;
+}
+
+/// Gets operands of the given field from the record. Makes pairs <Type, Name>
+/// where Type gives the type of the register, or immediate value, or label; and
+/// Name is the identifier given to that register/value/label (like src1).
+StringPairVector *parseOperands(std::string Field, Record *Rec) {
+  const DagInit *Dag = Rec->getValueAsDag(Field);
+  StringPairVector *Ret = new StringPairVector;
+  for (int I = 0, k = Dag->getNumArgs(); I < k; ++I) {
+    DefInit *Def = (DefInit *)Dag->getArg(I);
+    StringVector Types = flat(Def->getDef());
+    for (int J = 0, K = Types.size(); J < K; ++J) {
+      std::string Type = Types[J];
+      std::string Name;
+      if (Type == "variable_ops")
+        Name = "variable";
+      else {
+        std::string ArgName(Dag->getArgName(I)->getValue());
+        Name = Types.size() == 1 ? ArgName : (ArgName + std::to_string(J + 1));
+      }
+      Ret->push_back(StringPair(Type, escape(Name)));
+    }
+  }
+  return Ret;
+}
+
+/// Extracts all suboperands of an operand, if such exist, and returns their
+/// names in a list. If they do not, just returns the name of the operand as a
+/// list of one element.
+StringVector flat(const Record *Rec) {
+  StringVector Ret;
+  const RecordVal *Field = Rec->getValue("MIOperandInfo");
+  if (Field == nullptr) {
+    Ret.push_back(Rec->getNameInitAsString());
+    return Ret;
+  }
+  DagInit *Dag = (DagInit *)Field->getValue();
+  if (Dag->getNumArgs() == 0) {
+    Ret.push_back(Rec->getNameInitAsString());
+    return Ret;
+  }
+  for (auto AI = Dag->arg_begin(), AE = Dag->arg_end(); AI != AE; ++AI) {
+    StringVector Subs = flat(((DefInit *)*AI)->getDef());
+    Ret.insert(Ret.end(), Subs.begin(), Subs.end());
+  }
+  return Ret;
+}
+
+/// Returns only the names found in the given list of <Type, Name>.
+StringVector getNames(StringPairVector *List) {
+  StringVector Names;
+  for (StringPair Pair : *List)
+    Names.push_back(Pair.second);
+  return Names;
+}
+
+/// Applies the constraints given by \p Cons as substitutions on \p Outs .
+void executeConstraints(StringPairVector *Outs, std::string Cons) {
+  if (Cons.empty())
+    return;
+  for (std::string Con : split(Cons, ',')) {
+    std::string Con0 = trim(Con);
+    if (Con0.find("@earlyclobber") == 0)
+      continue;
+    StringVector List = split(Con0, '=');
+    assert(List.size() == 2 &&
+           "A constraint should involve exactly two operands");
+    std::string First = escape(trim(List[0]).substr(1));
+    std::string Second = escape(trim(List[1]).substr(1));
+    for (auto &Out : *Outs)
+      if (Out.second == First)
+        Out.second = Second;
+      else if (Out.second == Second)
+        Out.second = First;
+  }
+}
+
+/// Constructs a list of full list of operands, from given input operands and
+/// output operands.
+OperandVector getOperands(StringPairVector *Outs, StringPairVector *Ins,
+                          const RecordKeeper &Records) {
+  OperandVector Operands;
+  getOperandsFromVector(Outs, Ins, &Operands, true, Records);
+  getOperandsFromVector(Ins, Outs, &Operands, false, Records);
+  return Operands;
+}
+
+/// Adds operands from the \p vec list of operands to the \p operand list.
+void getOperandsFromVector(StringPairVector *Vec, StringPairVector *Help,
+                           OperandVector *Operands, bool Defs,
+                           const RecordKeeper &Records) {
+  for (StringPair Pair : *Vec) {
+    unison::Operand *Op = new unison::Operand;
+    Op->Name = Pair.second;
+
+    bool Flag = false;
+    for (auto Op1 : *Operands)
+      if (Op1.Name == Op->Name) {
+        Flag = true;
+        delete Op;
+        break;
+      }
+    if (Flag)
+      continue;
+
+    std::string UseDefF = Defs ? "def" : "use";
+    if (std::find(Help->begin(), Help->end(), Pair) != Help->end())
+      UseDefF = Defs ? "use" + UseDefF : UseDefF + "def";
+    Op->UseDef = UseDefF;
+    Op->RegType = Pair.first;
+
+    const Record *Def = Records.getDef(Op->RegType);
+
+    if (isRegister(Def))
+      Op->Type = unison::Operand::Register;
+    else if (isLabel(Def))
+      Op->Type = unison::Operand::Label;
+    else
+      Op->Type = unison::Operand::Bound;
+    Operands->push_back(*Op);
+  }
+}
+
+bool isLabel(const Record *Rec) {
+  // Gets ValueType.
+  const RecordVal *Val = Rec->getValue("Type");
+  if (Val == nullptr)
+    return false;
+  DefInit *Def = (DefInit *)Val->getValue();
+  // Supposedly the mark for the label.
+  return Def->getAsString() == "OtherVT";
+}
+
+bool isRegister(const Record *Rec) {
+  if (Rec == nullptr)
+    return false;
+  if (Rec->isSubClassOf("PointerLikeRegClass"))
+    return true;
+  for (auto Super : Rec->getSuperClasses())
+    // Class names that suggest that the object is a register.
+    for (auto Name :
+         {"RegisterClass", "Register", "RegisterOperand", "RegisterTuples"})
+      if (Super->getName() == Name)
+        return true;
+  return false;
+}
+
+/// Returns the string the describes the type of the record as "call", "linear"
+/// or "branch".
+std::string getRecordType(Record *Rec) {
+  if (getRecordBool(Rec, "isCall", false))
+    return "call";
+  if (getRecordBool(Rec, "isBranch", false) ||
+      getRecordBool(Rec, "isReturn", false))
+    return "branch";
+  return "linear";
+}
+
+std::string getRecordId(Record *Rec) { return Rec->getName().str(); }
+
+/// Cheks whether all attributes of the given record \p Rec are present for the
+/// record to be analyzed as a instruction.
+bool allNeededFieldsExist(Record *Rec) {
+  for (std::string Field :
+       {"isCall", "isBranch", "Constraints", "OutOperandList", "InOperandList",
+        "Size", "mayLoad", "mayStore", "Itinerary", "isReturn", "Uses", "Defs"})
+    if (!fieldExists(Rec, Field))
+      return false;
+  return true;
+}
+
+/// Checks whether a given attribute \p Field exists in the given record \p Rec.
+bool fieldExists(Record *Rec, std::string Field) {
+  return Rec->getValue(Field) != nullptr;
+}
+
+/// Splits the string \p Str with delimiter \p Del and returns a vector of
+/// strings.
+StringVector split(std::string Str, char Del) {
+  std::stringstream Buffer(Str);
+  std::string Element;
+  StringVector Ret;
+  while (getline(Buffer, Element, Del))
+    Ret.push_back(Element);
+  return Ret;
+}
+
+/// Trims the given string and returns the result.
+std::string trim(std::string Str) {
+  std::string WhiteSpaceChars(" \n\t\r");
+  Str.erase(0, Str.find_first_not_of(WhiteSpaceChars));
+  Str.erase(Str.find_last_not_of(WhiteSpaceChars) + 1);
+  return Str;
+}
+
+/// Escapes YAML reserved words in the given string.
+std::string escape(std::string Name) {
+  std::string Lname(Name);
+  std::transform(Lname.begin(), Lname.end(), Lname.begin(), ::tolower);
+  std::set<std::string> Reserved(
+      {"true", "false", "n", "y", "yes", "no", "on", "off"});
+  if (Reserved.count(Lname))
+    return Name + "'";
+  return Name;
+}
+
+} // end namespace llvm
diff --git a/llvm/lib/TableGen/rununi.sh b/llvm/lib/TableGen/rununi.sh
new file mode 100755
index 0000000000000..37ccc5e56df69
--- /dev/null
+++ b/llvm/lib/TableGen/rununi.sh
@@ -0,0 +1,5 @@
+LLVM_DIR=/home/mlevental/dev_projects/llvm-project/llvm
+TARGET=AMDGPU
+./bin/llvm-tblgen -unison $LLVM_DIR/lib/Target/$TARGET/$TARGET.td \
+            -I $LLVM_DIR/include -I $LLVM_DIR/lib/Target/$TARGET \
+            -o $TARGET.yaml
diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp
index edb7791500699..429e044935eb5 100644
--- a/llvm/utils/TableGen/Basic/TableGen.cpp
+++ b/llvm/utils/TableGen/Basic/TableGen.cpp
@@ -20,6 +20,7 @@
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/SetTheory.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/TableGen/Unison.h"
 #include <cassert>
 #include <string>
 #include <vector>
@@ -63,6 +64,7 @@ static TableGen::Emitter::Opt X[] = {
     {"null-backend", [](const RecordKeeper &Records, raw_ostream &OS) {},
      "Do nothing after parsing (useful for timing)"},
     {"dump-json", EmitJSON, "Dump all records as machine-readable JSON"},
+    {"unison", EmitUnisonFile, "emit unison"},
     {"print-enums", printEnums, "Print enum values for a class"},
     {"print-sets", printSets, "Print expanded sets for testing DAG exprs"},
 };

>From 498197aef8a6e75fd75e25dc3c1db80f18b64c4e Mon Sep 17 00:00:00 2001
From: max <maksim.levental at gmail.com>
Date: Sat, 5 Jul 2025 21:07:49 -0400
Subject: [PATCH 2/2] add llc-unison

---
 llvm/include/llvm/CodeGen/Passes.h      |   3 +
 llvm/include/llvm/InitializePasses.h    |   1 +
 llvm/lib/CodeGen/CMakeLists.txt         |   1 +
 llvm/lib/CodeGen/CodeGen.cpp            |   1 +
 llvm/lib/CodeGen/MIRParser/MILexer.cpp  |   4 +
 llvm/lib/CodeGen/MIRParser/MILexer.h    |   4 +
 llvm/lib/CodeGen/MIRParser/MIParser.cpp |  41 +++++++
 llvm/lib/CodeGen/MIRPrinter.cpp         |  55 +++++++++
 llvm/lib/CodeGen/MIRPrintingPass.cpp    |   6 +
 llvm/lib/CodeGen/UnisonMIRPrepare.cpp   | 142 ++++++++++++++++++++++++
 llvm/tools/llc-unison/CMakeLists.txt    |   3 +
 llvm/tools/llc-unison/llc-unison.py     |  97 ++++++++++++++++
 12 files changed, 358 insertions(+)
 create mode 100644 llvm/lib/CodeGen/UnisonMIRPrepare.cpp
 create mode 100644 llvm/tools/llc-unison/CMakeLists.txt
 create mode 100644 llvm/tools/llc-unison/llc-unison.py

diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 990452fa11fec..34c55bca56d8c 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -463,6 +463,9 @@ LLVM_ABI extern char &MachineCFGPrinterID;
 /// LiveDebugValues pass
 LLVM_ABI extern char &LiveDebugValuesID;
 
+/// UnisonMIRPrepare - This pass prepares for printing Unison-style MIR.
+LLVM_ABI extern char &UnisonMIRPrepareID;
+
 /// InterleavedAccess Pass - This pass identifies and matches interleaved
 /// memory accesses to target specific intrinsics.
 ///
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 1b5b1d5888824..afe64dae31d21 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -339,6 +339,7 @@ LLVM_ABI void initializeWasmEHPreparePass(PassRegistry &);
 LLVM_ABI void initializeWinEHPreparePass(PassRegistry &);
 LLVM_ABI void initializeWriteBitcodePassPass(PassRegistry &);
 LLVM_ABI void initializeXRayInstrumentationLegacyPass(PassRegistry &);
+LLVM_ABI void initializeUnisonMIRPreparePass(PassRegistry&);
 
 } // end namespace llvm
 
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 5dd6413431255..2e75bc2885e26 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -244,6 +244,7 @@ add_llvm_component_library(LLVMCodeGen
   TargetSubtargetInfo.cpp
   TwoAddressInstructionPass.cpp
   TypePromotion.cpp
+  UnisonMIRPrepare.cpp
   UnreachableBlockElim.cpp
   ValueTypes.cpp
   VLIWMachineScheduler.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 5250534d8a4e4..46df223cb68fd 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -146,4 +146,5 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeWasmEHPreparePass(Registry);
   initializeWinEHPreparePass(Registry);
   initializeXRayInstrumentationLegacyPass(Registry);
+  initializeUnisonMIRPreparePass(Registry);
 }
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 7153902fe2e7a..730d0a19d3bf1 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -257,15 +257,19 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("dereferenceable", MIToken::kw_dereferenceable)
       .Case("invariant", MIToken::kw_invariant)
       .Case("align", MIToken::kw_align)
+      .Case("freq", MIToken::kw_freq)
       .Case("basealign", MIToken::kw_basealign)
       .Case("addrspace", MIToken::kw_addrspace)
       .Case("stack", MIToken::kw_stack)
       .Case("got", MIToken::kw_got)
       .Case("jump-table", MIToken::kw_jump_table)
       .Case("constant-pool", MIToken::kw_constant_pool)
+      .Case("unknown", MIToken::kw_unknown)
       .Case("call-entry", MIToken::kw_call_entry)
       .Case("custom", MIToken::kw_custom)
       .Case("liveout", MIToken::kw_liveout)
+      .Case("liveouts", MIToken::kw_liveouts)
+      .Case("exit", MIToken::kw_exit)
       .Case("landing-pad", MIToken::kw_landing_pad)
       .Case("inlineasm-br-indirect-target",
             MIToken::kw_inlineasm_br_indirect_target)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index d7cd06759cfbb..43ee02aa44eac 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -113,6 +113,7 @@ struct MIToken {
     kw_non_temporal,
     kw_invariant,
     kw_align,
+    kw_freq, // Unison MIR style extension.
     kw_basealign,
     kw_addrspace,
     kw_stack,
@@ -120,8 +121,11 @@ struct MIToken {
     kw_jump_table,
     kw_constant_pool,
     kw_call_entry,
+    kw_unknown, // Unison MIR style extension.
     kw_custom,
     kw_liveout,
+    kw_liveouts,
+    kw_exit, // Unison MIR style extension.
     kw_landing_pad,
     kw_inlineasm_br_indirect_target,
     kw_ehfunclet_entry,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 3a364d5ff0d20..90e037bfca0e5 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -445,6 +445,8 @@ class MIParser {
   parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
   bool parseBasicBlock(MachineBasicBlock &MBB,
                        MachineBasicBlock *&AddFalthroughFrom);
+  // Unison MIR style extension: list of basic block live-out registers.
+  bool parseBasicBlockLiveouts();
   bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
   bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
 
@@ -751,6 +753,13 @@ bool MIParser::parseBasicBlockDefinition(
         if (parseAlignment(Alignment))
           return true;
         break;
+      case MIToken::kw_freq:
+        // Unison MIR style extension: basic block execution frequency.
+          lex();
+        if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+          return error("expected an integer literal");
+        lex();
+        break;
       case MIToken::IRBlock:
       case MIToken::NamedIRBlock:
         // TODO: Report an error when both name and ir block are specified.
@@ -888,6 +897,25 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
   return false;
 }
 
+bool MIParser::parseBasicBlockLiveouts() {
+  assert(Token.is(MIToken::kw_liveouts));
+  lex();
+  if (expectAndConsume(MIToken::colon))
+    return true;
+  if (Token.isNewlineOrEOF()) // Allow an empty list of liveouts.
+    return false;
+  do {
+    if (Token.isNot(MIToken::NamedRegister))
+      return error("expected a named register");
+    Register Reg{0};
+    VRegInfo *RegInfo;
+    if (parseRegister(Reg, RegInfo))
+      return true;
+    lex();
+  } while (consumeIfPresent(MIToken::comma));
+  return false;
+}
+
 bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
   assert(Token.is(MIToken::kw_successors));
   lex();
@@ -949,6 +977,11 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
     } else if (Token.is(MIToken::kw_liveins)) {
       if (parseBasicBlockLiveins(MBB))
         return true;
+    } else if (Token.is(MIToken::kw_liveouts)) {
+      if (parseBasicBlockLiveouts())
+        return true;
+    } else if (Token.is(MIToken::kw_exit)) {
+      lex();
     } else if (consumeIfPresent(MIToken::Newline)) {
       continue;
     } else
@@ -3285,6 +3318,14 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
 }
 
 bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
+  // Unison MIR style extension: accept "unknown" pseudo-values, just return a
+  // null pointer.
+  if (Token.is(MIToken::kw_unknown)) {
+    lex();
+    const Value *V = nullptr;
+    Dest = MachinePointerInfo(V);
+    return false;
+  }
   if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
       Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
       Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) ||
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 7710b503facc3..e9f8a38390356 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -70,6 +70,8 @@ static cl::opt<bool> SimplifyMIR(
 static cl::opt<bool> PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true),
                                     cl::desc("Print MIR debug-locations"));
 
+extern cl::opt<bool> UnisonMIR;
+
 namespace {
 
 /// This structure describes how to print out stack object references.
@@ -688,6 +690,19 @@ void printMBB(raw_ostream &OS, MFPrintState &State,
   OS << ":\n";
 
   bool HasLineAttributes = false;
+
+  if (UnisonMIR) {
+    // In Unison MIR style, the first instruction of each block contains the
+    // block's estimated execution frequency as a metadata operand. The
+    // instruction is emitted, but Unison is expected to disregard it.
+    OS << (HasLineAttributes ? ", " : " (");
+    auto MO = MBB.instr_begin()->operands_begin();
+    auto MD = MO->getMetadata()->getOperand(1).get();
+    auto MV = cast<ConstantAsMetadata>(MD)->getValue();
+    OS << "freq " << MV->getUniqueInteger();
+    HasLineAttributes = true;
+  }
+
   // Print the successors
   bool canPredictProbs = MBB.canPredictBranchProbabilities();
   // Even if the list of successors is empty, if we cannot guess it,
@@ -704,6 +719,12 @@ void printMBB(raw_ostream &OS, MFPrintState &State,
     ListSeparator LS;
     for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
       OS << LS << printMBBReference(**I);
+      // The Unison style uses a simpler formatting of the probabilities.
+      if (UnisonMIR && (!SimplifyMIR || !canPredictProbs))
+        OS << '('
+           << MBB.getSuccProbability(I).scale(100)
+           << ')';
+      else // Intentional indention to reduce merge conflicts.
       if (!SimplifyMIR || !canPredictProbs)
         OS << format("(0x%08" PRIx32 ")",
                      MBB.getSuccProbability(I).getNumerator());
@@ -727,6 +748,35 @@ void printMBB(raw_ostream &OS, MFPrintState &State,
     HasLineAttributes = true;
   }
 
+  if (UnisonMIR) {
+    // In the Unison style we print the live out registers. If there are no
+    // registers live-out, the marker still provides the information that the
+    // function actually returns (which is important e.g. to implement calling
+    // conventions).
+    if (MBB.isReturnBlock()) {
+      const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+      const MachineInstr &I = MBB.back();
+      OS.indent(2) << "liveouts:";
+      std::string Sep = " ";
+      // We assume that I's implicit uses correspond to the live out registers
+      // while the explicit uses are just common operands (such as the return
+      // address, predicate operands, etc.).
+      for (auto MO : I.uses())
+        if (MO.isReg() && MO.isImplicit()) {
+          OS << Sep << printReg(MO.getReg(), &TRI);
+          Sep = ", ";
+        }
+      OS << "\n";
+      HasLineAttributes = true;
+    }
+    // Print the 'exit' marker for basic blocks that exit but do not return to
+    // their caller function.
+    if (MBB.succ_empty() && (MBB.empty() || !MBB.back().isReturn())) {
+      OS.indent(2) << "exit\n";
+      HasLineAttributes = true;
+    }
+  }
+
   if (HasLineAttributes && !MBB.empty())
     OS << "\n";
   bool IsInBundle = false;
@@ -918,6 +968,11 @@ static void printMIOperand(raw_ostream &OS, MFPrintState &State,
   case MachineOperand::MO_BlockAddress:
   case MachineOperand::MO_DbgInstrRef:
   case MachineOperand::MO_ShuffleMask: {
+    // Unison expects metadata operands in a raw format.
+    if (UnisonMIR && Op.getType() == MachineOperand::MO_Metadata) {
+      OS << *(Op.getMetadata());
+      break;
+    }
     unsigned TiedOperandIdx = 0;
     if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
       TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
diff --git a/llvm/lib/CodeGen/MIRPrintingPass.cpp b/llvm/lib/CodeGen/MIRPrintingPass.cpp
index 28aeb7f116c6c..eff7315212d41 100644
--- a/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -14,12 +14,16 @@
 #include "llvm/CodeGen/MIRPrinter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/UnisonMIRPrepare.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
+cl::opt<bool> UnisonMIR("unison-mir", cl::desc("Print MIR in Unison style"));
+
 PreservedAnalyses PrintMIRPreparePass::run(Module &M, ModuleAnalysisManager &) {
   printMIR(OS, M);
   return PreservedAnalyses::all();
@@ -52,6 +56,8 @@ struct MIRPrintingPass : public MachineFunctionPass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
+    if (UnisonMIR)
+      AU.addRequired<UnisonMIRPrepare>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
diff --git a/llvm/lib/CodeGen/UnisonMIRPrepare.cpp b/llvm/lib/CodeGen/UnisonMIRPrepare.cpp
new file mode 100644
index 0000000000000..e01b69caca8ea
--- /dev/null
+++ b/llvm/lib/CodeGen/UnisonMIRPrepare.cpp
@@ -0,0 +1,142 @@
+//===-- UnisonMIRPrepare.cpp - Unison-style MIR printing preparation --=======//
+//
+//  Main authors:
+//    Roberto Castaneda Lozano <roberto.castaneda at ri.se>
+//
+//  This file is part of Unison, see http://unison-code.github.io
+//
+//  Copyright (c) 2017, RISE SICS AB
+//  All rights reserved.
+//
+//  Redistribution and use in source and binary forms, with or without
+//  modification, are permitted provided that the following conditions are met:
+//  1. Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//  2. Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//  3. Neither the name of the copyright holder nor the names of its
+//     contributors may be used to endorse or promote products derived from this
+//     software without specific prior written permission.
+//
+//  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+//  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+//  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+//  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+//  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+//  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+//  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+//  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+//  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+//  POSSIBILITY OF SUCH DAMAGE.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implementation of the UnisonMIRPrepare pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/UnisonMIRPrepare.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/IR/MDBuilder.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "unison-mir-prepare"
+
+INITIALIZE_PASS_BEGIN(UnisonMIRPrepare, DEBUG_TYPE,
+                      "Unison-style MIR printing preparation", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(UnisonMIRPrepare, DEBUG_TYPE,
+                    "Unison-style MIR printing preparation", true, true)
+
+char UnisonMIRPrepare::ID = 0;
+
+MDNode *createMDTaggedTuple(MachineFunction &MF, std::string Tag,
+                            uint64_t Val) {
+  LLVMContext &Context = MF.getFunction().getContext();
+  MDBuilder Builder(Context);
+  return MDNode::get(Context,
+                     {Builder.createString(Tag),
+                      Builder.createConstant(
+                          ConstantInt::get(Type::getInt64Ty(Context), Val))});
+}
+
+UnisonMIRPrepare::UnisonMIRPrepare() : MachineFunctionPass(ID) {
+  initializeUnisonMIRPreparePass(*PassRegistry::getPassRegistry());
+}
+
+void UnisonMIRPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+  AU.addRequired<AAResultsWrapperPass>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnisonMIRPrepare::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getSubtarget().getInstrInfo();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  for (auto &MBB : MF) {
+    annotateFrequency(MBB);
+    annotateMemoryPartitions(MBB);
+  }
+  return !MF.empty();
+}
+
+void UnisonMIRPrepare::annotateFrequency(MachineBasicBlock &MBB) {
+  MachineFunction &MF = *MBB.getParent();
+  uint64_t Freq = MBFI->getMBFI().getBlockFreq(&MBB).getFrequency();
+  MDNode *MD = createMDTaggedTuple(MF, "unison-block-frequency", Freq);
+  auto MI = MBB.instr_begin();
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::ANNOTATION_LABEL))
+      .addMetadata(MD);
+}
+
+void UnisonMIRPrepare::annotateMemoryPartitions(MachineBasicBlock &MBB) {
+  MachineFunction &MF = *MBB.getParent();
+  // Create initial partitions with all the memory references in the block.
+  MemAccessPartition MAP;
+  for (auto &MI : MBB)
+    if (!MI.isBundle() && (MI.mayStore() || MI.mayLoad()))
+      MAP.insert(&MI);
+  // Pairwise compare all memory references and merge those which may alias.
+  for (auto &MI1 : MAP)
+    for (auto &MI2 : MAP)
+      // If MI1 and MI2 may alias. We use the same interface to 'AliasAnalysis'
+      // as 'ScheduleDAGInstrs::addChainDependency' (that is, invoking
+      // 'MachineInstr::mayAlias'). Therefore we share the same assumptions, see
+      // the comments for 'MachineInstr::mayAlias'.
+      if ((MI1->getData()->mayStore() || MI2->getData()->mayStore()) &&
+          MI1->getData()->mayAlias(AA, *(MI2->getData()), true))
+        MAP.unionSets(MI1->getData(), MI2->getData());
+  // Populate the memory partition map.
+  unsigned int P = 0;
+  for (MemAccessPartition::iterator MA = MAP.begin(); MA != MAP.end(); ++MA) {
+    if (!(*MA)->isLeader())
+      continue;
+    for (MemAccessPartition::member_iterator MI = MAP.member_begin(**MA);
+         MI != MAP.member_end(); ++MI)
+      MP[*MI] = P;
+    ++P;
+  }
+  // Add a debug operand to each unbundled memory access instruction with the
+  // partition of its memory reference.
+  for (auto &MI : MBB)
+    if (!MI.isBundle() && (MI.mayStore() || MI.mayLoad())) {
+      MDNode *MD =
+          createMDTaggedTuple(MF, "unison-memory-partition", MP.at(&MI));
+      MI.addOperand(MF, MachineOperand::CreateMetadata(MD));
+    }
+}
diff --git a/llvm/tools/llc-unison/CMakeLists.txt b/llvm/tools/llc-unison/CMakeLists.txt
new file mode 100644
index 0000000000000..31b0f323d0ecc
--- /dev/null
+++ b/llvm/tools/llc-unison/CMakeLists.txt
@@ -0,0 +1,3 @@
+install(PROGRAMS llc-unison
+  DESTINATION ${LLVM_TOOLS_INSTALL_DIR}
+  COMPONENT llc-unison)
\ No newline at end of file
diff --git a/llvm/tools/llc-unison/llc-unison.py b/llvm/tools/llc-unison/llc-unison.py
new file mode 100644
index 0000000000000..cb5e958586e7e
--- /dev/null
+++ b/llvm/tools/llc-unison/llc-unison.py
@@ -0,0 +1,97 @@
+#! /usr/bin/python
+#
+#  Main authors:
+#    Roberto Castaneda Lozano <roberto.castaneda at ri.se>
+#
+#  This file is part of Unison, see http://unison-code.github.io
+#
+#  Copyright (c) 2018, RISE SICS AB
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#  1. Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#  2. Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#  3. Neither the name of the copyright holder nor the names of its contributors
+#     may be used to endorse or promote products derived from this software
+#     without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+
+# llc-unison: script to run llc with Unison
+#
+# Runs llc twice to generate Unison's input, then Unison itself, and then llc
+# again to emit the generated assembly code. Has the same interface as llc
+# itself, plus a few additional flags to control Unison.
+
+import os
+import sys
+import argparse
+import subprocess
+import tempfile
+
+def execute(cmd):
+    print(" ".join(cmd))
+    subprocess.call(cmd)
+    return
+
+def temp_filename(ext):
+    return tempfile.NamedTemporaryFile(suffix=ext).name
+
+# Intercept input file, output file, and Unison flags
+parser = argparse.ArgumentParser(description='Run llc with Unison. The option -o must be given.')
+parser.add_argument('infile', metavar='INPUT', help='input file')
+parser.add_argument('-o', metavar='OUTPUT', help='output file')
+parser.add_argument('--uni-flags', help='flags to be passed to Unison')
+(args, llc_flags) = parser.parse_known_args()
+
+exit_pass  = "phi-node-elimination"
+entry_pass = "funclet-layout"
+
+# Expect 'llc' in the same directory
+llc = os.path.join(os.path.dirname(sys.argv[0]), "llc")
+# Expect 'uni' in the PATH
+uni = "uni"
+
+# Generate main input to Unison (.ll -> .mir)
+
+mir = temp_filename('.mir')
+cmd_mir = [llc] + llc_flags + \
+          ["-stop-before", exit_pass, "-unison-mir", "-o", mir, args.infile]
+execute(cmd_mir)
+
+# Generate initial solution for Unison (.ll -> .asm.mir)
+
+asm_mir = temp_filename('.asm.mir')
+cmd_asm_mir = [llc] + llc_flags + \
+              ["-stop-before", entry_pass, "-unison-mir", "-o", asm_mir, args.infile]
+execute(cmd_asm_mir)
+
+# Run Unison (.mir -> .asm.mir -> .unison.mir)
+
+unison_mir = temp_filename('.unison.mir')
+cmd_uni = [uni, "run", "--llvm6", "--verbose"] + \
+          ["-o", unison_mir, mir, "--basefile=" + asm_mir]
+if args.uni_flags is not None:
+    cmd_uni += [args.uni_flags]
+execute(cmd_uni)
+
+# Generate assembly code (.unison.mir -> .s)
+
+cmd_s = [llc] + llc_flags + \
+        ["-start-before", entry_pass, "-o", args.o, unison_mir]
+execute(cmd_s)
\ No newline at end of file



More information about the llvm-commits mailing list