[llvm] [clang-tools-extra] [clang] [LLVM] Add IRNormalizer Pass (PR #68176)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 4 04:36:17 PDT 2023
================
@@ -0,0 +1,637 @@
+//===--------------- IRNormalizer.cpp - IR Normalizer ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the IRNormalizer class which aims to transform LLVM
+/// Modules into a canonical form by reordering and renaming instructions while
+/// preserving the same semantics. The normalizer makes it easier to spot
+/// semantic differences while diffing two modules which have undergone
+/// different passes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/IRNormalizer.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils.h"
+#include <algorithm>
+#include <vector>
+
+#define DEBUG_TYPE "normalize"
+
+using namespace llvm;
+
+namespace {
+/// IRNormalizer aims to transform LLVM IR into canonical form.
+class IRNormalizer {
+public:
+ /// \name Normalizer flags.
+ /// @{
+ /// Preserves original order of instructions.
+ static cl::opt<bool> PreserveOrder;
+ /// Renames all instructions (including user-named).
+ static cl::opt<bool> RenameAll;
+ /// Folds all regular instructions (including pre-outputs).
+ static cl::opt<bool> FoldPreoutputs;
+ /// Sorts and reorders operands in commutative instructions.
+ static cl::opt<bool> ReorderOperands;
+ /// @}
+
+ bool runOnFunction(Function &F);
+
+private:
+ // Random constant for hashing, so the state isn't zero.
+ const uint64_t MagicHashConstant = 0x6acaa36bef8325c5ULL;
+ DenseSet<const Instruction *> NamedInstructions;
+
+ /// \name Naming.
+ /// @{
+ void nameFunctionArguments(Function &F);
+ void nameBasicBlocks(Function &F);
+ void nameInstruction(Instruction *I);
+ void nameAsInitialInstruction(Instruction *I);
+ void nameAsRegularInstruction(Instruction *I);
+ void foldInstructionName(Instruction *I);
+ /// @}
+
+ /// \name Reordering.
+ /// @{
+ void reorderInstructions(SmallVector<Instruction *, 16> &Outputs);
+ void reorderInstruction(Instruction *Used, Instruction *User,
+ SmallPtrSet<const Instruction *, 32> &Visited);
+ void reorderInstructionOperandsByNames(Instruction *I);
+ void reorderPHIIncomingValues(PHINode *PN);
+ /// @}
+
+ /// \name Utility methods.
+ /// @{
+ SmallVector<Instruction *, 16> collectOutputInstructions(Function &F);
+ bool isOutput(const Instruction *I);
+ bool isInitialInstruction(const Instruction *I);
+ bool hasOnlyImmediateOperands(const Instruction *I);
+ SetVector<int>
+ getOutputFootprint(Instruction *I,
+ SmallPtrSet<const Instruction *, 32> &Visited);
+ /// @}
+};
+} // namespace
+
+cl::opt<bool> IRNormalizer::PreserveOrder(
+ "preserve-order", cl::Hidden,
+ cl::desc("Preserves original instruction order"));
+cl::opt<bool> IRNormalizer::RenameAll(
+ "rename-all", cl::Hidden,
+ cl::desc("Renames all instructions (including user-named)"));
+cl::opt<bool> IRNormalizer::FoldPreoutputs(
+ "fold-all", cl::Hidden,
+ cl::desc("Folds all regular instructions (including pre-outputs)"));
+cl::opt<bool> IRNormalizer::ReorderOperands(
+ "reorder-operands", cl::Hidden,
+ cl::desc("Sorts and reorders operands in commutative instructions"));
+
+/// Entry method to the IRNormalizer.
+///
+/// \param M Module to normalize.
+bool IRNormalizer::runOnFunction(Function &F) {
+ nameFunctionArguments(F);
+ nameBasicBlocks(F);
+
+ SmallVector<Instruction *, 16> Outputs = collectOutputInstructions(F);
+
+ if (!PreserveOrder)
+ reorderInstructions(Outputs);
+
+ for (auto &I : Outputs)
+ nameInstruction(I);
+
+ for (auto &I : instructions(F)) {
+ if (!PreserveOrder) {
+ if (ReorderOperands && I.isCommutative())
+ reorderInstructionOperandsByNames(&I);
+
+ if (auto *PN = dyn_cast<PHINode>(&I))
+ reorderPHIIncomingValues(PN);
+ }
+
+ foldInstructionName(&I);
+ }
+
+ return true;
+}
+
+/// Numbers arguments.
+///
+/// \param F Function whose arguments will be renamed.
+void IRNormalizer::nameFunctionArguments(Function &F) {
+ int ArgumentCounter = 0;
+ for (auto &A : F.args()) {
+ if (RenameAll || A.getName().empty()) {
+ A.setName("a" + Twine(ArgumentCounter));
+ ++ArgumentCounter;
+ }
+ }
+}
+
+/// Names basic blocks using a generated hash for each basic block in
+/// a function considering the opcode and the order of output instructions.
+///
+/// \param F Function containing basic blocks to rename.
+void IRNormalizer::nameBasicBlocks(Function &F) {
+ for (auto &B : F) {
+ // Initialize to a magic constant, so the state isn't zero.
+ uint64_t Hash = MagicHashConstant;
+
+ // Hash considering output instruction opcodes.
+ for (auto &I : B)
+ if (isOutput(&I))
+ Hash = hashing::detail::hash_16_bytes(Hash, I.getOpcode());
+
+ if (RenameAll || B.getName().empty()) {
+ // Name basic block. Substring hash to make diffs more readable.
+ B.setName("bb" + std::to_string(Hash).substr(0, 5));
+ }
+ }
+}
+
+/// Names instructions graphically (recursive) in accordance with the
+/// def-use tree, starting from the initial instructions (defs), finishing at
+/// the output (top-most user) instructions (depth-first).
+///
+/// \param I Instruction to be renamed.
+void IRNormalizer::nameInstruction(Instruction *I) {
+ // Ensure instructions are not renamed. This is done
+ // to prevent situation where instructions are used
+ // before their definition (in phi nodes)
+ if (NamedInstructions.contains(I))
+ return;
+ NamedInstructions.insert(I);
+ // Determine the type of instruction to name.
+ if (isInitialInstruction(I)) {
+ // This is an initial instruction.
+ nameAsInitialInstruction(I);
+ } else {
+ // This must be a regular instruction.
+ nameAsRegularInstruction(I);
+ }
+}
+
+/// Names instruction following the scheme:
+/// vl00000Callee(Operands)
+///
+/// Where 00000 is a hash calculated considering instruction's opcode and output
+/// footprint. Callee's name is only included when instruction's type is
+/// CallInst. In cases where instruction is commutative, operands list is also
+/// sorted.
+///
+/// Renames instruction only when RenameAll flag is raised or instruction is
+/// unnamed.
+///
+/// \see getOutputFootprint()
+/// \param I Instruction to be renamed.
+void IRNormalizer::nameAsInitialInstruction(Instruction *I) {
+ if (I->getType()->isVoidTy() || (!I->getName().empty() && !RenameAll))
+ return;
+
+ // Instruction operands for further sorting.
+ SmallVector<SmallString<64>, 4> Operands;
+
+ // Collect operands.
+ for (auto &OP : I->operands()) {
+ if (!isa<Function>(OP)) {
+ std::string TextRepresentation;
+ raw_string_ostream Stream(TextRepresentation);
+ OP->printAsOperand(Stream, false);
+ Operands.push_back(StringRef(Stream.str()));
+ }
+ }
+
+ if (I->isCommutative())
----------------
nikic wrote:
isCommutative() only applies to the first two operands, not all of them.
https://github.com/llvm/llvm-project/pull/68176
More information about the llvm-commits
mailing list