[llvm] [llvm-mca] Add bottle-neck analysis to JSON output. (PR #90056)

Phil Camp via llvm-commits llvm-commits at lists.llvm.org
Fri May 17 06:28:24 PDT 2024


https://github.com/FlameTop updated https://github.com/llvm/llvm-project/pull/90056

>From 010509461d609899836ac4d554bcd42d37351e79 Mon Sep 17 00:00:00 2001
From: Phil Camp <phil.camp at sony.com>
Date: Thu, 25 Apr 2024 14:43:18 +0100
Subject: [PATCH 1/4] Add bottle-neck analysis to JSON output.

---
 .../llvm-mca/JSON/X86/views-bottleneck.s      | 166 ++++++++++++++++++
 .../llvm-mca/Views/BottleneckAnalysis.cpp     |  46 +++++
 .../tools/llvm-mca/Views/BottleneckAnalysis.h |   3 +-
 3 files changed, 214 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s

diff --git a/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s b/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
new file mode 100644
index 0000000000000..2267e623b5e99
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
@@ -0,0 +1,166 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --timeline --all-stats --all-views < %s | FileCheck %s
+vaddps %xmm0, %xmm0, %xmm1
+vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+
+# CHECK:      {
+# CHECK-NEXT:   "CodeRegions": [
+# CHECK-NEXT:     {
+# CHECK-NEXT:       "BottleneckAnalysis": {
+# CHECK-NEXT:         "DataDependencyCycles": 0,
+# CHECK-NEXT:         "DependencyEdge": [
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "FromID": 1,
+# CHECK-NEXT:             "ResourceOrRegID": 128,
+# CHECK-NEXT:             "ToID": 3,
+# CHECK-NEXT:             "Type": 3
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "FromID": 3,
+# CHECK-NEXT:             "ResourceOrRegID": 128,
+# CHECK-NEXT:             "ToID": 5,
+# CHECK-NEXT:             "Type": 3
+# CHECK-NEXT:           }
+# CHECK-NEXT:         ],
+# CHECK-NEXT:         "MemoryDependencyCycles": 0,
+# CHECK-NEXT:         "PressureIncreaseCycles": 56,
+# CHECK-NEXT:         "RegisterDependencyCycles": 0,
+# CHECK-NEXT:         "ResourcePressure": [
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "HWPort5": 56
+# CHECK-NEXT:           }
+# CHECK-NEXT:         ],
+# CHECK-NEXT:         "ResourcePressureCycles": 56,
+# CHECK-NEXT:         "TotalCycles": 209
+# CHECK-NEXT:       },
+# CHECK-NEXT:       "DispatchStatistics": {
+# CHECK-NEXT:         "GROUP": 0,
+# CHECK-NEXT:         "LQ": 0,
+# CHECK-NEXT:         "RAT": 0,
+# CHECK-NEXT:         "RCU": 13,
+# CHECK-NEXT:         "SCHEDQ": 0,
+# CHECK-NEXT:         "SQ": 0,
+# CHECK-NEXT:         "USH": 0
+# CHECK-NEXT:       },
+# CHECK-NEXT:       "InstructionInfoView": {
+# CHECK-NEXT:         "InstructionList": [
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "Instruction": 0,
+# CHECK-NEXT:             "Latency": 3,
+# CHECK-NEXT:             "NumMicroOpcodes": 1,
+# CHECK-NEXT:             "RThroughput": 1,
+# CHECK-NEXT:             "hasUnmodeledSideEffects": false,
+# CHECK-NEXT:             "mayLoad": false,
+# CHECK-NEXT:             "mayStore": false
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "Instruction": 1,
+# CHECK-NEXT:             "Latency": 8,
+# CHECK-NEXT:             "NumMicroOpcodes": 3,
+# CHECK-NEXT:             "RThroughput": 2,
+# CHECK-NEXT:             "hasUnmodeledSideEffects": false,
+# CHECK-NEXT:             "mayLoad": true,
+# CHECK-NEXT:             "mayStore": false
+# CHECK-NEXT:           }
+# CHECK-NEXT:         ]
+# CHECK-NEXT:       },
+# CHECK-NEXT:       "Instructions": [
+# CHECK-NEXT:         "vaddps\t%xmm0, %xmm0, %xmm1",
+# CHECK-NEXT:         "vblendvps\t%xmm1, (%rdi), %xmm2, %xmm3"
+# CHECK-NEXT:       ],
+# CHECK-NEXT:       "Name": "",
+# CHECK-NEXT:       "ResourcePressureView": {
+# CHECK-NEXT:         "ResourcePressureInfo": [
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "InstructionIndex": 0,
+# CHECK-NEXT:             "ResourceIndex": 3,
+# CHECK-NEXT:             "ResourceUsage": 1
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "InstructionIndex": 1,
+# CHECK-NEXT:             "ResourceIndex": 4,
+# CHECK-NEXT:             "ResourceUsage": 0.5
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "InstructionIndex": 1,
+# CHECK-NEXT:             "ResourceIndex": 5,
+# CHECK-NEXT:             "ResourceUsage": 0.5
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "InstructionIndex": 1,
+# CHECK-NEXT:             "ResourceIndex": 7,
+# CHECK-NEXT:             "ResourceUsage": 2
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "InstructionIndex": 2,
+# CHECK-NEXT:             "ResourceIndex": 3,
+# CHECK-NEXT:             "ResourceUsage": 1
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "InstructionIndex": 2,
+# CHECK-NEXT:             "ResourceIndex": 4,
+# CHECK-NEXT:             "ResourceUsage": 0.5
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "InstructionIndex": 2,
+# CHECK-NEXT:             "ResourceIndex": 5,
+# CHECK-NEXT:             "ResourceUsage": 0.5
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "InstructionIndex": 2,
+# CHECK-NEXT:             "ResourceIndex": 7,
+# CHECK-NEXT:             "ResourceUsage": 2
+# CHECK-NEXT:           }
+# CHECK-NEXT:         ]
+# CHECK-NEXT:       },
+# CHECK-NEXT:       "SummaryView": {
+# CHECK-NEXT:         "BlockRThroughput": 2,
+# CHECK-NEXT:         "DispatchWidth": 4,
+# CHECK-NEXT:         "IPC": 0.9569377990430622,
+# CHECK-NEXT:         "Instructions": 200,
+# CHECK-NEXT:         "Iterations": 100,
+# CHECK-NEXT:         "TotalCycles": 209,
+# CHECK-NEXT:         "TotaluOps": 400,
+# CHECK-NEXT:         "uOpsPerCycle": 1.9138755980861244
+# CHECK-NEXT:       },
+# CHECK-NEXT:       "TimelineView": {
+# CHECK-NEXT:         "TimelineInfo": [
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "CycleDispatched": 0,
+# CHECK-NEXT:             "CycleExecuted": 4,
+# CHECK-NEXT:             "CycleIssued": 1,
+# CHECK-NEXT:             "CycleReady": 0,
+# CHECK-NEXT:             "CycleRetired": 5
+# CHECK-NEXT:           },
+# CHECK-NEXT:           {
+# CHECK-NEXT:             "CycleDispatched": 0,
+# CHECK-NEXT:             "CycleExecuted": 9,
+# CHECK-NEXT:             "CycleIssued": 1,
+# CHECK-NEXT:             "CycleReady": 1,
+# CHECK-NEXT:             "CycleRetired": 10
+# CHECK-NEXT:           }
+# CHECK-NEXT:         ]
+# CHECK-NEXT:       }
+# CHECK-NEXT:     }
+# CHECK-NEXT:   ],
+# CHECK-NEXT:   "SimulationParameters": {
+# CHECK-NEXT:     "-march": "x86_64",
+# CHECK-NEXT:     "-mcpu": "haswell",
+# CHECK-NEXT:     "-mtriple": "x86_64-unknown-unknown"
+# CHECK-NEXT:   },
+# CHECK-NEXT:   "TargetInfo": {
+# CHECK-NEXT:     "CPUName": "haswell",
+# CHECK-NEXT:     "Resources": [
+# CHECK-NEXT:       "HWDivider",
+# CHECK-NEXT:       "HWFPDivider",
+# CHECK-NEXT:       "HWPort0",
+# CHECK-NEXT:       "HWPort1",
+# CHECK-NEXT:       "HWPort2",
+# CHECK-NEXT:       "HWPort3",
+# CHECK-NEXT:       "HWPort4",
+# CHECK-NEXT:       "HWPort5",
+# CHECK-NEXT:       "HWPort6",
+# CHECK-NEXT:       "HWPort7"
+# CHECK-NEXT:     ]
+# CHECK-NEXT:   }
+# CHECK-NEXT: }
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 409a7010b80c8..ece5b6709a2e4 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -641,5 +641,51 @@ void BottleneckAnalysis::printView(raw_ostream &OS) const {
   printCriticalSequence(OS);
 }
 
+json::Value BottleneckAnalysis::toJSON() const {
+  if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
+    json::Object JO({{"PressureIncreaseCycles", 0}});
+    return JO;
+  }
+
+  json::Array CriticalSequence;
+  // get critical sequence
+  SmallVector<const DependencyEdge *, 16> Seq;
+  DG.getCriticalSequence(Seq);
+  if (!Seq.empty()) {
+    for (const DependencyEdge *&DE : Seq) {
+      json::Object DEJO({{"FromID", DE->FromIID},
+                         {"ToID", DE->ToIID},
+                         {"Type", static_cast<unsigned>(DE->Dep.Type)},
+                         {"ResourceOrRegID", DE->Dep.ResourceOrRegID}});
+      CriticalSequence.push_back(std::move(DEJO));
+    }
+  }
+
+  json::Array ResourcePressure;
+  if (BPI.PressureIncreaseCycles) {
+    ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+    const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+    for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+      unsigned ReleaseAtCycles = Distribution[I];
+      if (ReleaseAtCycles) {
+        const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
+        json::Object RPJO({{PRDesc.Name, ReleaseAtCycles}});
+        ResourcePressure.push_back(std::move(RPJO));
+      }
+    }
+  }
+
+  json::Object JO({{"PressureIncreaseCycles", BPI.PressureIncreaseCycles},
+                   {"ResourcePressureCycles", BPI.ResourcePressureCycles},
+                   {"DataDependencyCycles", BPI.DataDependencyCycles},
+                   {"RegisterDependencyCycles", BPI.RegisterDependencyCycles},
+                   {"MemoryDependencyCycles", BPI.MemoryDependencyCycles},
+                   {"TotalCycles", TotalCycles},
+                   {"DependencyEdge", std::move(CriticalSequence)},
+                   {"ResourcePressure", std::move(ResourcePressure)}});
+
+  return JO;
+}
+
 } // namespace mca.
 } // namespace llvm
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
index e709b25c3f768..529090cf543fc 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -335,7 +335,8 @@ class BottleneckAnalysis : public InstructionView {
 
   void printView(raw_ostream &OS) const override;
   StringRef getNameAsString() const override { return "BottleneckAnalysis"; }
-  bool isSerializable() const override { return false; }
+  bool isSerializable() const override { return true; }
+  json::Value toJSON() const override;
 
 #ifndef NDEBUG
   void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }

>From 731bc9a032a98e2c65797bba1732fbbda16f9c1d Mon Sep 17 00:00:00 2001
From: Phil Camp <phil.camp at sony.com>
Date: Fri, 26 Apr 2024 13:24:28 +0100
Subject: [PATCH 2/4] Removed non-bottleneck views from test

---
 .../llvm-mca/JSON/X86/views-bottleneck.s      | 95 +------------------
 1 file changed, 1 insertion(+), 94 deletions(-)

diff --git a/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s b/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
index 2267e623b5e99..c8ad60ee54209 100644
--- a/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
+++ b/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --timeline --all-stats --all-views < %s | FileCheck %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --bottleneck-analysis --resource-pressure=false --instruction-info=false < %s | FileCheck %s
 vaddps %xmm0, %xmm0, %xmm1
 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
 
@@ -33,86 +33,11 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
 # CHECK-NEXT:         "ResourcePressureCycles": 56,
 # CHECK-NEXT:         "TotalCycles": 209
 # CHECK-NEXT:       },
-# CHECK-NEXT:       "DispatchStatistics": {
-# CHECK-NEXT:         "GROUP": 0,
-# CHECK-NEXT:         "LQ": 0,
-# CHECK-NEXT:         "RAT": 0,
-# CHECK-NEXT:         "RCU": 13,
-# CHECK-NEXT:         "SCHEDQ": 0,
-# CHECK-NEXT:         "SQ": 0,
-# CHECK-NEXT:         "USH": 0
-# CHECK-NEXT:       },
-# CHECK-NEXT:       "InstructionInfoView": {
-# CHECK-NEXT:         "InstructionList": [
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "Instruction": 0,
-# CHECK-NEXT:             "Latency": 3,
-# CHECK-NEXT:             "NumMicroOpcodes": 1,
-# CHECK-NEXT:             "RThroughput": 1,
-# CHECK-NEXT:             "hasUnmodeledSideEffects": false,
-# CHECK-NEXT:             "mayLoad": false,
-# CHECK-NEXT:             "mayStore": false
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "Instruction": 1,
-# CHECK-NEXT:             "Latency": 8,
-# CHECK-NEXT:             "NumMicroOpcodes": 3,
-# CHECK-NEXT:             "RThroughput": 2,
-# CHECK-NEXT:             "hasUnmodeledSideEffects": false,
-# CHECK-NEXT:             "mayLoad": true,
-# CHECK-NEXT:             "mayStore": false
-# CHECK-NEXT:           }
-# CHECK-NEXT:         ]
-# CHECK-NEXT:       },
 # CHECK-NEXT:       "Instructions": [
 # CHECK-NEXT:         "vaddps\t%xmm0, %xmm0, %xmm1",
 # CHECK-NEXT:         "vblendvps\t%xmm1, (%rdi), %xmm2, %xmm3"
 # CHECK-NEXT:       ],
 # CHECK-NEXT:       "Name": "",
-# CHECK-NEXT:       "ResourcePressureView": {
-# CHECK-NEXT:         "ResourcePressureInfo": [
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "InstructionIndex": 0,
-# CHECK-NEXT:             "ResourceIndex": 3,
-# CHECK-NEXT:             "ResourceUsage": 1
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "InstructionIndex": 1,
-# CHECK-NEXT:             "ResourceIndex": 4,
-# CHECK-NEXT:             "ResourceUsage": 0.5
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "InstructionIndex": 1,
-# CHECK-NEXT:             "ResourceIndex": 5,
-# CHECK-NEXT:             "ResourceUsage": 0.5
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "InstructionIndex": 1,
-# CHECK-NEXT:             "ResourceIndex": 7,
-# CHECK-NEXT:             "ResourceUsage": 2
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "InstructionIndex": 2,
-# CHECK-NEXT:             "ResourceIndex": 3,
-# CHECK-NEXT:             "ResourceUsage": 1
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "InstructionIndex": 2,
-# CHECK-NEXT:             "ResourceIndex": 4,
-# CHECK-NEXT:             "ResourceUsage": 0.5
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "InstructionIndex": 2,
-# CHECK-NEXT:             "ResourceIndex": 5,
-# CHECK-NEXT:             "ResourceUsage": 0.5
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "InstructionIndex": 2,
-# CHECK-NEXT:             "ResourceIndex": 7,
-# CHECK-NEXT:             "ResourceUsage": 2
-# CHECK-NEXT:           }
-# CHECK-NEXT:         ]
-# CHECK-NEXT:       },
 # CHECK-NEXT:       "SummaryView": {
 # CHECK-NEXT:         "BlockRThroughput": 2,
 # CHECK-NEXT:         "DispatchWidth": 4,
@@ -122,24 +47,6 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
 # CHECK-NEXT:         "TotalCycles": 209,
 # CHECK-NEXT:         "TotaluOps": 400,
 # CHECK-NEXT:         "uOpsPerCycle": 1.9138755980861244
-# CHECK-NEXT:       },
-# CHECK-NEXT:       "TimelineView": {
-# CHECK-NEXT:         "TimelineInfo": [
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "CycleDispatched": 0,
-# CHECK-NEXT:             "CycleExecuted": 4,
-# CHECK-NEXT:             "CycleIssued": 1,
-# CHECK-NEXT:             "CycleReady": 0,
-# CHECK-NEXT:             "CycleRetired": 5
-# CHECK-NEXT:           },
-# CHECK-NEXT:           {
-# CHECK-NEXT:             "CycleDispatched": 0,
-# CHECK-NEXT:             "CycleExecuted": 9,
-# CHECK-NEXT:             "CycleIssued": 1,
-# CHECK-NEXT:             "CycleReady": 1,
-# CHECK-NEXT:             "CycleRetired": 10
-# CHECK-NEXT:           }
-# CHECK-NEXT:         ]
 # CHECK-NEXT:       }
 # CHECK-NEXT:     }
 # CHECK-NEXT:   ],

>From 74e3aef65d6f4c5a117697a440eaa1d9a42a53ed Mon Sep 17 00:00:00 2001
From: Phil Camp <phil.camp at sony.com>
Date: Fri, 17 May 2024 14:22:28 +0100
Subject: [PATCH 3/4] Changes to make Float-to-int scalar transform codegen
 deterministic

---
 .../llvm/Transforms/Scalar/Float2Int.h        | 24 +++++++++++++++++-
 llvm/lib/Transforms/Scalar/Float2Int.cpp      | 25 ++++++++++++++-----
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/Float2Int.h b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
index 337e229efcf37..6922917624e78 100644
--- a/llvm/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
@@ -28,6 +28,25 @@ class LLVMContext;
 class Type;
 class Value;
 
+class OrderedInstruction {
+  Instruction *Ins;
+  unsigned int Order;
+
+public:
+  OrderedInstruction(Instruction *Inst, unsigned int Ord) : Ins(Inst), Order(Ord) {}
+
+  Instruction *getInstruction() { return Ins; }
+  unsigned int getOrder() { return Order; }
+};
+
+template <class T> struct OrderedInstructionLess {
+  bool operator()(const T &lhs, const T &rhs) const {
+    OrderedInstruction lhsOrder = lhs;
+    OrderedInstruction rhsOrder = rhs;
+    return rhsOrder.getOrder() < lhsOrder.getOrder();
+  }
+};
+
 class Float2IntPass : public PassInfoMixin<Float2IntPass> {
 public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -36,6 +55,7 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
   bool runImpl(Function &F, const DominatorTree &DT);
 
 private:
+  unsigned int insOrder(Instruction *I);
   void findRoots(Function &F, const DominatorTree &DT);
   void seen(Instruction *I, ConstantRange R);
   ConstantRange badRange();
@@ -50,7 +70,9 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
 
   MapVector<Instruction *, ConstantRange> SeenInsts;
   SmallSetVector<Instruction *, 8> Roots;
-  EquivalenceClasses<Instruction *> ECs;
+  EquivalenceClasses<OrderedInstruction,
+                     OrderedInstructionLess<OrderedInstruction>> ECs;
+  MapVector<Instruction *, unsigned int> InstructionOrders;
   MapVector<Instruction *, Value *> ConvertedInsts;
   LLVMContext *Ctx;
 };
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index da4d39b4e3ed4..cc69b78e32dc1 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -84,6 +84,16 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
   }
 }
 
+// Instruction order - return deterministic order suitable as set
+// order for EquivalenceClasses.
+unsigned int Float2IntPass::insOrder(Instruction* I) {
+  static unsigned int order = 0;
+  if (InstructionOrders.find(I) != InstructionOrders.end())
+    return InstructionOrders[I];
+  InstructionOrders[I] = order++;
+  return order - 1;
+}
+
 // Find the roots - instructions that convert from the FP domain to
 // integer domain.
 void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) {
@@ -191,7 +201,7 @@ void Float2IntPass::walkBackwards() {
     for (Value *O : I->operands()) {
       if (Instruction *OI = dyn_cast<Instruction>(O)) {
         // Unify def-use chains if they interfere.
-        ECs.unionSets(I, OI);
+        ECs.unionSets(OrderedInstruction(I, insOrder(I)), OrderedInstruction(OI, insOrder(OI)));
         if (SeenInsts.find(I)->second != badRange())
           Worklist.push_back(OI);
       } else if (!isa<ConstantFP>(O)) {
@@ -323,7 +333,8 @@ bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
     // For every member of the partition, union all the ranges together.
     for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
          MI != ME; ++MI) {
-      Instruction *I = *MI;
+      OrderedInstruction OMI = *MI;
+      Instruction *I = OMI.getInstruction();
       auto SeenI = SeenInsts.find(I);
       if (SeenI == SeenInsts.end())
         continue;
@@ -392,9 +403,10 @@ bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
       }
     }
 
-    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
-         MI != ME; ++MI)
-      convert(*MI, Ty);
+    for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); MI != ME; ++MI) {
+      OrderedInstruction OMI = *MI;
+      convert(OMI.getInstruction(), Ty);
+    }
     MadeChange = true;
   }
 
@@ -485,8 +497,9 @@ void Float2IntPass::cleanup() {
 bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
   LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
   // Clear out all state.
-  ECs = EquivalenceClasses<Instruction*>();
+  ECs = EquivalenceClasses<OrderedInstruction, OrderedInstructionLess<OrderedInstruction> >();
   SeenInsts.clear();
+  InstructionOrders.clear();
   ConvertedInsts.clear();
   Roots.clear();
 

>From 83fe3d0a3c65e2c2ed6b4f7dc212d480ee99b9ca Mon Sep 17 00:00:00 2001
From: Phil Camp <phil.camp at sony.com>
Date: Fri, 17 May 2024 14:27:28 +0100
Subject: [PATCH 4/4] Revert "Changes to make Float-to-int scalar transform
 codegen deterministic"

This reverts commit 74e3aef65d6f4c5a117697a440eaa1d9a42a53ed.
---
 .../llvm/Transforms/Scalar/Float2Int.h        | 24 +-----------------
 llvm/lib/Transforms/Scalar/Float2Int.cpp      | 25 +++++--------------
 2 files changed, 7 insertions(+), 42 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/Float2Int.h b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
index 6922917624e78..337e229efcf37 100644
--- a/llvm/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
@@ -28,25 +28,6 @@ class LLVMContext;
 class Type;
 class Value;
 
-class OrderedInstruction {
-  Instruction *Ins;
-  unsigned int Order;
-
-public:
-  OrderedInstruction(Instruction *Inst, unsigned int Ord) : Ins(Inst), Order(Ord) {}
-
-  Instruction *getInstruction() { return Ins; }
-  unsigned int getOrder() { return Order; }
-};
-
-template <class T> struct OrderedInstructionLess {
-  bool operator()(const T &lhs, const T &rhs) const {
-    OrderedInstruction lhsOrder = lhs;
-    OrderedInstruction rhsOrder = rhs;
-    return rhsOrder.getOrder() < lhsOrder.getOrder();
-  }
-};
-
 class Float2IntPass : public PassInfoMixin<Float2IntPass> {
 public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -55,7 +36,6 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
   bool runImpl(Function &F, const DominatorTree &DT);
 
 private:
-  unsigned int insOrder(Instruction *I);
   void findRoots(Function &F, const DominatorTree &DT);
   void seen(Instruction *I, ConstantRange R);
   ConstantRange badRange();
@@ -70,9 +50,7 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
 
   MapVector<Instruction *, ConstantRange> SeenInsts;
   SmallSetVector<Instruction *, 8> Roots;
-  EquivalenceClasses<OrderedInstruction,
-                     OrderedInstructionLess<OrderedInstruction>> ECs;
-  MapVector<Instruction *, unsigned int> InstructionOrders;
+  EquivalenceClasses<Instruction *> ECs;
   MapVector<Instruction *, Value *> ConvertedInsts;
   LLVMContext *Ctx;
 };
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index cc69b78e32dc1..da4d39b4e3ed4 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -84,16 +84,6 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
   }
 }
 
-// Instruction order - return deterministic order suitable as set
-// order for EquivalenceClasses.
-unsigned int Float2IntPass::insOrder(Instruction* I) {
-  static unsigned int order = 0;
-  if (InstructionOrders.find(I) != InstructionOrders.end())
-    return InstructionOrders[I];
-  InstructionOrders[I] = order++;
-  return order - 1;
-}
-
 // Find the roots - instructions that convert from the FP domain to
 // integer domain.
 void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) {
@@ -201,7 +191,7 @@ void Float2IntPass::walkBackwards() {
     for (Value *O : I->operands()) {
       if (Instruction *OI = dyn_cast<Instruction>(O)) {
         // Unify def-use chains if they interfere.
-        ECs.unionSets(OrderedInstruction(I, insOrder(I)), OrderedInstruction(OI, insOrder(OI)));
+        ECs.unionSets(I, OI);
         if (SeenInsts.find(I)->second != badRange())
           Worklist.push_back(OI);
       } else if (!isa<ConstantFP>(O)) {
@@ -333,8 +323,7 @@ bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
     // For every member of the partition, union all the ranges together.
     for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
          MI != ME; ++MI) {
-      OrderedInstruction OMI = *MI;
-      Instruction *I = OMI.getInstruction();
+      Instruction *I = *MI;
       auto SeenI = SeenInsts.find(I);
       if (SeenI == SeenInsts.end())
         continue;
@@ -403,10 +392,9 @@ bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
       }
     }
 
-    for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); MI != ME; ++MI) {
-      OrderedInstruction OMI = *MI;
-      convert(OMI.getInstruction(), Ty);
-    }
+    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+         MI != ME; ++MI)
+      convert(*MI, Ty);
     MadeChange = true;
   }
 
@@ -497,9 +485,8 @@ void Float2IntPass::cleanup() {
 bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
   LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
   // Clear out all state.
-  ECs = EquivalenceClasses<OrderedInstruction, OrderedInstructionLess<OrderedInstruction> >();
+  ECs = EquivalenceClasses<Instruction*>();
   SeenInsts.clear();
-  InstructionOrders.clear();
   ConvertedInsts.clear();
   Roots.clear();
 



More information about the llvm-commits mailing list