[clang-tools-extra] [clang] [llvm] [LLVM] Add IRNormalizer Pass (PR #68176)

Sat Nov 4 04:36:19 PDT 2023

================
@@ -0,0 +1,637 @@
+//===--------------- IRNormalizer.cpp - IR Normalizer ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the IRNormalizer class which aims to transform LLVM
+/// Modules into a canonical form by reordering and renaming instructions while
+/// preserving the same semantics. The normalizer makes it easier to spot
+/// semantic differences while diffing two modules which have undergone
+/// different passes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/IRNormalizer.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils.h"
+#include <algorithm>
+#include <vector>
+
+#define DEBUG_TYPE "normalize"
+
+using namespace llvm;
+
+namespace {
+/// IRNormalizer aims to transform LLVM IR into canonical form.
+class IRNormalizer {
+public:
+  /// \name Normalizer flags.
+  /// @{
+  /// Preserves original order of instructions.
+  static cl::opt<bool> PreserveOrder;
+  /// Renames all instructions (including user-named).
+  static cl::opt<bool> RenameAll;
+  /// Folds all regular instructions (including pre-outputs).
+  static cl::opt<bool> FoldPreoutputs;
+  /// Sorts and reorders operands in commutative instructions.
+  static cl::opt<bool> ReorderOperands;
+  /// @}
+
+  bool runOnFunction(Function &F);
+
+private:
+  // Random constant for hashing, so the state isn't zero.
+  const uint64_t MagicHashConstant = 0x6acaa36bef8325c5ULL;
+  DenseSet<const Instruction *> NamedInstructions;
+
+  /// \name Naming.
+  /// @{
+  void nameFunctionArguments(Function &F);
+  void nameBasicBlocks(Function &F);
+  void nameInstruction(Instruction *I);
+  void nameAsInitialInstruction(Instruction *I);
+  void nameAsRegularInstruction(Instruction *I);
+  void foldInstructionName(Instruction *I);
+  /// @}
+
+  /// \name Reordering.
+  /// @{
+  void reorderInstructions(SmallVector<Instruction *, 16> &Outputs);
+  void reorderInstruction(Instruction *Used, Instruction *User,
+                          SmallPtrSet<const Instruction *, 32> &Visited);
+  void reorderInstructionOperandsByNames(Instruction *I);
+  void reorderPHIIncomingValues(PHINode *PN);
+  /// @}
+
+  /// \name Utility methods.
+  /// @{
+  SmallVector<Instruction *, 16> collectOutputInstructions(Function &F);
+  bool isOutput(const Instruction *I);
+  bool isInitialInstruction(const Instruction *I);
+  bool hasOnlyImmediateOperands(const Instruction *I);
+  SetVector<int>
+  getOutputFootprint(Instruction *I,
+                     SmallPtrSet<const Instruction *, 32> &Visited);
+  /// @}
+};
+} // namespace
+
+cl::opt<bool> IRNormalizer::PreserveOrder(
+    "preserve-order", cl::Hidden,
+    cl::desc("Preserves original instruction order"));
+cl::opt<bool> IRNormalizer::RenameAll(
+    "rename-all", cl::Hidden,
+    cl::desc("Renames all instructions (including user-named)"));
+cl::opt<bool> IRNormalizer::FoldPreoutputs(
+    "fold-all", cl::Hidden,
+    cl::desc("Folds all regular instructions (including pre-outputs)"));
+cl::opt<bool> IRNormalizer::ReorderOperands(
+    "reorder-operands", cl::Hidden,
+    cl::desc("Sorts and reorders operands in commutative instructions"));
+
+/// Entry method to the IRNormalizer.
+///
+/// \param M Module to normalize.
+bool IRNormalizer::runOnFunction(Function &F) {
+  nameFunctionArguments(F);
+  nameBasicBlocks(F);
+
+  SmallVector<Instruction *, 16> Outputs = collectOutputInstructions(F);
+
+  if (!PreserveOrder)
+    reorderInstructions(Outputs);
+
+  for (auto &I : Outputs)
+    nameInstruction(I);
+
+  for (auto &I : instructions(F)) {
+    if (!PreserveOrder) {
+      if (ReorderOperands && I.isCommutative())
+        reorderInstructionOperandsByNames(&I);
+
+      if (auto *PN = dyn_cast<PHINode>(&I))
+        reorderPHIIncomingValues(PN);
+    }
+
+    foldInstructionName(&I);
+  }
+
+  return true;
+}
+
+/// Numbers arguments.
+///
+/// \param F Function whose arguments will be renamed.
+void IRNormalizer::nameFunctionArguments(Function &F) {
+  int ArgumentCounter = 0;
+  for (auto &A : F.args()) {
+    if (RenameAll || A.getName().empty()) {
+      A.setName("a" + Twine(ArgumentCounter));
+      ++ArgumentCounter;
+    }
+  }
+}
+
+/// Names basic blocks using a generated hash for each basic block in
+/// a function considering the opcode and the order of output instructions.
+///
+/// \param F Function containing basic blocks to rename.
+void IRNormalizer::nameBasicBlocks(Function &F) {
+  for (auto &B : F) {
+    // Initialize to a magic constant, so the state isn't zero.
+    uint64_t Hash = MagicHashConstant;
+
+    // Hash considering output instruction opcodes.
+    for (auto &I : B)
+      if (isOutput(&I))
+        Hash = hashing::detail::hash_16_bytes(Hash, I.getOpcode());
+
+    if (RenameAll || B.getName().empty()) {
+      // Name basic block. Substring hash to make diffs more readable.
+      B.setName("bb" + std::to_string(Hash).substr(0, 5));
+    }
+  }
+}
+
+/// Names instructions graphically (recursive) in accordance with the
+/// def-use tree, starting from the initial instructions (defs), finishing at
+/// the output (top-most user) instructions (depth-first).
+///
+/// \param I Instruction to be renamed.
+void IRNormalizer::nameInstruction(Instruction *I) {
+  // Ensure instructions are not renamed. This is done
+  // to prevent situation where instructions are used
+  // before their definition (in phi nodes)
+  if (NamedInstructions.contains(I))
+    return;
+  NamedInstructions.insert(I);
+  // Determine the type of instruction to name.
+  if (isInitialInstruction(I)) {
+    // This is an initial instruction.
----------------
nikic wrote:

This comment and the one below just repeat the method names :)

https://github.com/llvm/llvm-project/pull/68176