[llvm] [llvm-mca] Add bottle-neck analysis to JSON output. (PR #90056)
Phil Camp via llvm-commits
llvm-commits at lists.llvm.org
Fri May 17 06:28:24 PDT 2024
https://github.com/FlameTop updated https://github.com/llvm/llvm-project/pull/90056
>From 010509461d609899836ac4d554bcd42d37351e79 Mon Sep 17 00:00:00 2001
From: Phil Camp <phil.camp at sony.com>
Date: Thu, 25 Apr 2024 14:43:18 +0100
Subject: [PATCH 1/4] Add bottle-neck analysis to JSON output.
---
.../llvm-mca/JSON/X86/views-bottleneck.s | 166 ++++++++++++++++++
.../llvm-mca/Views/BottleneckAnalysis.cpp | 46 +++++
.../tools/llvm-mca/Views/BottleneckAnalysis.h | 3 +-
3 files changed, 214 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
diff --git a/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s b/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
new file mode 100644
index 0000000000000..2267e623b5e99
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
@@ -0,0 +1,166 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --timeline --all-stats --all-views < %s | FileCheck %s
+vaddps %xmm0, %xmm0, %xmm1
+vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+
+# CHECK: {
+# CHECK-NEXT: "CodeRegions": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "BottleneckAnalysis": {
+# CHECK-NEXT: "DataDependencyCycles": 0,
+# CHECK-NEXT: "DependencyEdge": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "FromID": 1,
+# CHECK-NEXT: "ResourceOrRegID": 128,
+# CHECK-NEXT: "ToID": 3,
+# CHECK-NEXT: "Type": 3
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "FromID": 3,
+# CHECK-NEXT: "ResourceOrRegID": 128,
+# CHECK-NEXT: "ToID": 5,
+# CHECK-NEXT: "Type": 3
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "MemoryDependencyCycles": 0,
+# CHECK-NEXT: "PressureIncreaseCycles": 56,
+# CHECK-NEXT: "RegisterDependencyCycles": 0,
+# CHECK-NEXT: "ResourcePressure": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "HWPort5": 56
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "ResourcePressureCycles": 56,
+# CHECK-NEXT: "TotalCycles": 209
+# CHECK-NEXT: },
+# CHECK-NEXT: "DispatchStatistics": {
+# CHECK-NEXT: "GROUP": 0,
+# CHECK-NEXT: "LQ": 0,
+# CHECK-NEXT: "RAT": 0,
+# CHECK-NEXT: "RCU": 13,
+# CHECK-NEXT: "SCHEDQ": 0,
+# CHECK-NEXT: "SQ": 0,
+# CHECK-NEXT: "USH": 0
+# CHECK-NEXT: },
+# CHECK-NEXT: "InstructionInfoView": {
+# CHECK-NEXT: "InstructionList": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "Instruction": 0,
+# CHECK-NEXT: "Latency": 3,
+# CHECK-NEXT: "NumMicroOpcodes": 1,
+# CHECK-NEXT: "RThroughput": 1,
+# CHECK-NEXT: "hasUnmodeledSideEffects": false,
+# CHECK-NEXT: "mayLoad": false,
+# CHECK-NEXT: "mayStore": false
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "Instruction": 1,
+# CHECK-NEXT: "Latency": 8,
+# CHECK-NEXT: "NumMicroOpcodes": 3,
+# CHECK-NEXT: "RThroughput": 2,
+# CHECK-NEXT: "hasUnmodeledSideEffects": false,
+# CHECK-NEXT: "mayLoad": true,
+# CHECK-NEXT: "mayStore": false
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+# CHECK-NEXT: },
+# CHECK-NEXT: "Instructions": [
+# CHECK-NEXT: "vaddps\t%xmm0, %xmm0, %xmm1",
+# CHECK-NEXT: "vblendvps\t%xmm1, (%rdi), %xmm2, %xmm3"
+# CHECK-NEXT: ],
+# CHECK-NEXT: "Name": "",
+# CHECK-NEXT: "ResourcePressureView": {
+# CHECK-NEXT: "ResourcePressureInfo": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "InstructionIndex": 0,
+# CHECK-NEXT: "ResourceIndex": 3,
+# CHECK-NEXT: "ResourceUsage": 1
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "InstructionIndex": 1,
+# CHECK-NEXT: "ResourceIndex": 4,
+# CHECK-NEXT: "ResourceUsage": 0.5
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "InstructionIndex": 1,
+# CHECK-NEXT: "ResourceIndex": 5,
+# CHECK-NEXT: "ResourceUsage": 0.5
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "InstructionIndex": 1,
+# CHECK-NEXT: "ResourceIndex": 7,
+# CHECK-NEXT: "ResourceUsage": 2
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "InstructionIndex": 2,
+# CHECK-NEXT: "ResourceIndex": 3,
+# CHECK-NEXT: "ResourceUsage": 1
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "InstructionIndex": 2,
+# CHECK-NEXT: "ResourceIndex": 4,
+# CHECK-NEXT: "ResourceUsage": 0.5
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "InstructionIndex": 2,
+# CHECK-NEXT: "ResourceIndex": 5,
+# CHECK-NEXT: "ResourceUsage": 0.5
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "InstructionIndex": 2,
+# CHECK-NEXT: "ResourceIndex": 7,
+# CHECK-NEXT: "ResourceUsage": 2
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+# CHECK-NEXT: },
+# CHECK-NEXT: "SummaryView": {
+# CHECK-NEXT: "BlockRThroughput": 2,
+# CHECK-NEXT: "DispatchWidth": 4,
+# CHECK-NEXT: "IPC": 0.9569377990430622,
+# CHECK-NEXT: "Instructions": 200,
+# CHECK-NEXT: "Iterations": 100,
+# CHECK-NEXT: "TotalCycles": 209,
+# CHECK-NEXT: "TotaluOps": 400,
+# CHECK-NEXT: "uOpsPerCycle": 1.9138755980861244
+# CHECK-NEXT: },
+# CHECK-NEXT: "TimelineView": {
+# CHECK-NEXT: "TimelineInfo": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "CycleDispatched": 0,
+# CHECK-NEXT: "CycleExecuted": 4,
+# CHECK-NEXT: "CycleIssued": 1,
+# CHECK-NEXT: "CycleReady": 0,
+# CHECK-NEXT: "CycleRetired": 5
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "CycleDispatched": 0,
+# CHECK-NEXT: "CycleExecuted": 9,
+# CHECK-NEXT: "CycleIssued": 1,
+# CHECK-NEXT: "CycleReady": 1,
+# CHECK-NEXT: "CycleRetired": 10
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+# CHECK-NEXT: }
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "SimulationParameters": {
+# CHECK-NEXT: "-march": "x86_64",
+# CHECK-NEXT: "-mcpu": "haswell",
+# CHECK-NEXT: "-mtriple": "x86_64-unknown-unknown"
+# CHECK-NEXT: },
+# CHECK-NEXT: "TargetInfo": {
+# CHECK-NEXT: "CPUName": "haswell",
+# CHECK-NEXT: "Resources": [
+# CHECK-NEXT: "HWDivider",
+# CHECK-NEXT: "HWFPDivider",
+# CHECK-NEXT: "HWPort0",
+# CHECK-NEXT: "HWPort1",
+# CHECK-NEXT: "HWPort2",
+# CHECK-NEXT: "HWPort3",
+# CHECK-NEXT: "HWPort4",
+# CHECK-NEXT: "HWPort5",
+# CHECK-NEXT: "HWPort6",
+# CHECK-NEXT: "HWPort7"
+# CHECK-NEXT: ]
+# CHECK-NEXT: }
+# CHECK-NEXT: }
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 409a7010b80c8..ece5b6709a2e4 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -641,5 +641,51 @@ void BottleneckAnalysis::printView(raw_ostream &OS) const {
printCriticalSequence(OS);
}
+json::Value BottleneckAnalysis::toJSON() const {
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
+ json::Object JO({{"PressureIncreaseCycles", 0}});
+ return JO;
+ }
+
+ json::Array CriticalSequence;
+ // get critical sequence
+ SmallVector<const DependencyEdge *, 16> Seq;
+ DG.getCriticalSequence(Seq);
+ if (!Seq.empty()) {
+ for (const DependencyEdge *&DE : Seq) {
+ json::Object DEJO({{"FromID", DE->FromIID},
+ {"ToID", DE->ToIID},
+ {"Type", static_cast<unsigned>(DE->Dep.Type)},
+ {"ResourceOrRegID", DE->Dep.ResourceOrRegID}});
+ CriticalSequence.push_back(std::move(DEJO));
+ }
+ }
+
+ json::Array ResourcePressure;
+ if (BPI.PressureIncreaseCycles) {
+ ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+ unsigned ReleaseAtCycles = Distribution[I];
+ if (ReleaseAtCycles) {
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
+ json::Object RPJO({{PRDesc.Name, ReleaseAtCycles}});
+ ResourcePressure.push_back(std::move(RPJO));
+ }
+ }
+ }
+
+ json::Object JO({{"PressureIncreaseCycles", BPI.PressureIncreaseCycles},
+ {"ResourcePressureCycles", BPI.ResourcePressureCycles},
+ {"DataDependencyCycles", BPI.DataDependencyCycles},
+ {"RegisterDependencyCycles", BPI.RegisterDependencyCycles},
+ {"MemoryDependencyCycles", BPI.MemoryDependencyCycles},
+ {"TotalCycles", TotalCycles},
+ {"DependencyEdge", std::move(CriticalSequence)},
+ {"ResourcePressure", std::move(ResourcePressure)}});
+
+ return JO;
+}
+
} // namespace mca.
} // namespace llvm
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
index e709b25c3f768..529090cf543fc 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -335,7 +335,8 @@ class BottleneckAnalysis : public InstructionView {
void printView(raw_ostream &OS) const override;
StringRef getNameAsString() const override { return "BottleneckAnalysis"; }
- bool isSerializable() const override { return false; }
+ bool isSerializable() const override { return true; }
+ json::Value toJSON() const override;
#ifndef NDEBUG
void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }
>From 731bc9a032a98e2c65797bba1732fbbda16f9c1d Mon Sep 17 00:00:00 2001
From: Phil Camp <phil.camp at sony.com>
Date: Fri, 26 Apr 2024 13:24:28 +0100
Subject: [PATCH 2/4] Removed non-bottleneck views from test
---
.../llvm-mca/JSON/X86/views-bottleneck.s | 95 +------------------
1 file changed, 1 insertion(+), 94 deletions(-)
diff --git a/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s b/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
index 2267e623b5e99..c8ad60ee54209 100644
--- a/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
+++ b/llvm/test/tools/llvm-mca/JSON/X86/views-bottleneck.s
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --timeline --all-stats --all-views < %s | FileCheck %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --bottleneck-analysis --resource-pressure=false --instruction-info=false < %s | FileCheck %s
vaddps %xmm0, %xmm0, %xmm1
vblendvps %xmm1, (%rdi), %xmm2, %xmm3
@@ -33,86 +33,11 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# CHECK-NEXT: "ResourcePressureCycles": 56,
# CHECK-NEXT: "TotalCycles": 209
# CHECK-NEXT: },
-# CHECK-NEXT: "DispatchStatistics": {
-# CHECK-NEXT: "GROUP": 0,
-# CHECK-NEXT: "LQ": 0,
-# CHECK-NEXT: "RAT": 0,
-# CHECK-NEXT: "RCU": 13,
-# CHECK-NEXT: "SCHEDQ": 0,
-# CHECK-NEXT: "SQ": 0,
-# CHECK-NEXT: "USH": 0
-# CHECK-NEXT: },
-# CHECK-NEXT: "InstructionInfoView": {
-# CHECK-NEXT: "InstructionList": [
-# CHECK-NEXT: {
-# CHECK-NEXT: "Instruction": 0,
-# CHECK-NEXT: "Latency": 3,
-# CHECK-NEXT: "NumMicroOpcodes": 1,
-# CHECK-NEXT: "RThroughput": 1,
-# CHECK-NEXT: "hasUnmodeledSideEffects": false,
-# CHECK-NEXT: "mayLoad": false,
-# CHECK-NEXT: "mayStore": false
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "Instruction": 1,
-# CHECK-NEXT: "Latency": 8,
-# CHECK-NEXT: "NumMicroOpcodes": 3,
-# CHECK-NEXT: "RThroughput": 2,
-# CHECK-NEXT: "hasUnmodeledSideEffects": false,
-# CHECK-NEXT: "mayLoad": true,
-# CHECK-NEXT: "mayStore": false
-# CHECK-NEXT: }
-# CHECK-NEXT: ]
-# CHECK-NEXT: },
# CHECK-NEXT: "Instructions": [
# CHECK-NEXT: "vaddps\t%xmm0, %xmm0, %xmm1",
# CHECK-NEXT: "vblendvps\t%xmm1, (%rdi), %xmm2, %xmm3"
# CHECK-NEXT: ],
# CHECK-NEXT: "Name": "",
-# CHECK-NEXT: "ResourcePressureView": {
-# CHECK-NEXT: "ResourcePressureInfo": [
-# CHECK-NEXT: {
-# CHECK-NEXT: "InstructionIndex": 0,
-# CHECK-NEXT: "ResourceIndex": 3,
-# CHECK-NEXT: "ResourceUsage": 1
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "InstructionIndex": 1,
-# CHECK-NEXT: "ResourceIndex": 4,
-# CHECK-NEXT: "ResourceUsage": 0.5
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "InstructionIndex": 1,
-# CHECK-NEXT: "ResourceIndex": 5,
-# CHECK-NEXT: "ResourceUsage": 0.5
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "InstructionIndex": 1,
-# CHECK-NEXT: "ResourceIndex": 7,
-# CHECK-NEXT: "ResourceUsage": 2
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "InstructionIndex": 2,
-# CHECK-NEXT: "ResourceIndex": 3,
-# CHECK-NEXT: "ResourceUsage": 1
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "InstructionIndex": 2,
-# CHECK-NEXT: "ResourceIndex": 4,
-# CHECK-NEXT: "ResourceUsage": 0.5
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "InstructionIndex": 2,
-# CHECK-NEXT: "ResourceIndex": 5,
-# CHECK-NEXT: "ResourceUsage": 0.5
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "InstructionIndex": 2,
-# CHECK-NEXT: "ResourceIndex": 7,
-# CHECK-NEXT: "ResourceUsage": 2
-# CHECK-NEXT: }
-# CHECK-NEXT: ]
-# CHECK-NEXT: },
# CHECK-NEXT: "SummaryView": {
# CHECK-NEXT: "BlockRThroughput": 2,
# CHECK-NEXT: "DispatchWidth": 4,
@@ -122,24 +47,6 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# CHECK-NEXT: "TotalCycles": 209,
# CHECK-NEXT: "TotaluOps": 400,
# CHECK-NEXT: "uOpsPerCycle": 1.9138755980861244
-# CHECK-NEXT: },
-# CHECK-NEXT: "TimelineView": {
-# CHECK-NEXT: "TimelineInfo": [
-# CHECK-NEXT: {
-# CHECK-NEXT: "CycleDispatched": 0,
-# CHECK-NEXT: "CycleExecuted": 4,
-# CHECK-NEXT: "CycleIssued": 1,
-# CHECK-NEXT: "CycleReady": 0,
-# CHECK-NEXT: "CycleRetired": 5
-# CHECK-NEXT: },
-# CHECK-NEXT: {
-# CHECK-NEXT: "CycleDispatched": 0,
-# CHECK-NEXT: "CycleExecuted": 9,
-# CHECK-NEXT: "CycleIssued": 1,
-# CHECK-NEXT: "CycleReady": 1,
-# CHECK-NEXT: "CycleRetired": 10
-# CHECK-NEXT: }
-# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: }
# CHECK-NEXT: ],
>From 74e3aef65d6f4c5a117697a440eaa1d9a42a53ed Mon Sep 17 00:00:00 2001
From: Phil Camp <phil.camp at sony.com>
Date: Fri, 17 May 2024 14:22:28 +0100
Subject: [PATCH 3/4] Changes to make Float-to-int scalar transform codegen
deterministic
---
.../llvm/Transforms/Scalar/Float2Int.h | 24 +++++++++++++++++-
llvm/lib/Transforms/Scalar/Float2Int.cpp | 25 ++++++++++++++-----
2 files changed, 42 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/Float2Int.h b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
index 337e229efcf37..6922917624e78 100644
--- a/llvm/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
@@ -28,6 +28,25 @@ class LLVMContext;
class Type;
class Value;
+class OrderedInstruction {
+ Instruction *Ins;
+ unsigned int Order;
+
+public:
+ OrderedInstruction(Instruction *Inst, unsigned int Ord) : Ins(Inst), Order(Ord) {}
+
+ Instruction *getInstruction() { return Ins; }
+ unsigned int getOrder() { return Order; }
+};
+
+template <class T> struct OrderedInstructionLess {
+ bool operator()(const T &lhs, const T &rhs) const {
+ OrderedInstruction lhsOrder = lhs;
+ OrderedInstruction rhsOrder = rhs;
+ return rhsOrder.getOrder() < lhsOrder.getOrder();
+ }
+};
+
class Float2IntPass : public PassInfoMixin<Float2IntPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -36,6 +55,7 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
bool runImpl(Function &F, const DominatorTree &DT);
private:
+ unsigned int insOrder(Instruction *I);
void findRoots(Function &F, const DominatorTree &DT);
void seen(Instruction *I, ConstantRange R);
ConstantRange badRange();
@@ -50,7 +70,9 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
MapVector<Instruction *, ConstantRange> SeenInsts;
SmallSetVector<Instruction *, 8> Roots;
- EquivalenceClasses<Instruction *> ECs;
+ EquivalenceClasses<OrderedInstruction,
+ OrderedInstructionLess<OrderedInstruction>> ECs;
+ MapVector<Instruction *, unsigned int> InstructionOrders;
MapVector<Instruction *, Value *> ConvertedInsts;
LLVMContext *Ctx;
};
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index da4d39b4e3ed4..cc69b78e32dc1 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -84,6 +84,16 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
}
}
+// Instruction order - return deterministic order suitable as set
+// order for EquivalenceClasses.
+unsigned int Float2IntPass::insOrder(Instruction* I) {
+ static unsigned int order = 0;
+ if (InstructionOrders.find(I) != InstructionOrders.end())
+ return InstructionOrders[I];
+ InstructionOrders[I] = order++;
+ return order - 1;
+}
+
// Find the roots - instructions that convert from the FP domain to
// integer domain.
void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) {
@@ -191,7 +201,7 @@ void Float2IntPass::walkBackwards() {
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
// Unify def-use chains if they interfere.
- ECs.unionSets(I, OI);
+ ECs.unionSets(OrderedInstruction(I, insOrder(I)), OrderedInstruction(OI, insOrder(OI)));
if (SeenInsts.find(I)->second != badRange())
Worklist.push_back(OI);
} else if (!isa<ConstantFP>(O)) {
@@ -323,7 +333,8 @@ bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
// For every member of the partition, union all the ranges together.
for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
MI != ME; ++MI) {
- Instruction *I = *MI;
+ OrderedInstruction OMI = *MI;
+ Instruction *I = OMI.getInstruction();
auto SeenI = SeenInsts.find(I);
if (SeenI == SeenInsts.end())
continue;
@@ -392,9 +403,10 @@ bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
}
}
- for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
- MI != ME; ++MI)
- convert(*MI, Ty);
+ for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); MI != ME; ++MI) {
+ OrderedInstruction OMI = *MI;
+ convert(OMI.getInstruction(), Ty);
+ }
MadeChange = true;
}
@@ -485,8 +497,9 @@ void Float2IntPass::cleanup() {
bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
// Clear out all state.
- ECs = EquivalenceClasses<Instruction*>();
+ ECs = EquivalenceClasses<OrderedInstruction, OrderedInstructionLess<OrderedInstruction> >();
SeenInsts.clear();
+ InstructionOrders.clear();
ConvertedInsts.clear();
Roots.clear();
>From 83fe3d0a3c65e2c2ed6b4f7dc212d480ee99b9ca Mon Sep 17 00:00:00 2001
From: Phil Camp <phil.camp at sony.com>
Date: Fri, 17 May 2024 14:27:28 +0100
Subject: [PATCH 4/4] Revert "Changes to make Float-to-int scalar transform
codegen deterministic"
This reverts commit 74e3aef65d6f4c5a117697a440eaa1d9a42a53ed.
---
.../llvm/Transforms/Scalar/Float2Int.h | 24 +-----------------
llvm/lib/Transforms/Scalar/Float2Int.cpp | 25 +++++--------------
2 files changed, 7 insertions(+), 42 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/Float2Int.h b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
index 6922917624e78..337e229efcf37 100644
--- a/llvm/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
@@ -28,25 +28,6 @@ class LLVMContext;
class Type;
class Value;
-class OrderedInstruction {
- Instruction *Ins;
- unsigned int Order;
-
-public:
- OrderedInstruction(Instruction *Inst, unsigned int Ord) : Ins(Inst), Order(Ord) {}
-
- Instruction *getInstruction() { return Ins; }
- unsigned int getOrder() { return Order; }
-};
-
-template <class T> struct OrderedInstructionLess {
- bool operator()(const T &lhs, const T &rhs) const {
- OrderedInstruction lhsOrder = lhs;
- OrderedInstruction rhsOrder = rhs;
- return rhsOrder.getOrder() < lhsOrder.getOrder();
- }
-};
-
class Float2IntPass : public PassInfoMixin<Float2IntPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -55,7 +36,6 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
bool runImpl(Function &F, const DominatorTree &DT);
private:
- unsigned int insOrder(Instruction *I);
void findRoots(Function &F, const DominatorTree &DT);
void seen(Instruction *I, ConstantRange R);
ConstantRange badRange();
@@ -70,9 +50,7 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
MapVector<Instruction *, ConstantRange> SeenInsts;
SmallSetVector<Instruction *, 8> Roots;
- EquivalenceClasses<OrderedInstruction,
- OrderedInstructionLess<OrderedInstruction>> ECs;
- MapVector<Instruction *, unsigned int> InstructionOrders;
+ EquivalenceClasses<Instruction *> ECs;
MapVector<Instruction *, Value *> ConvertedInsts;
LLVMContext *Ctx;
};
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index cc69b78e32dc1..da4d39b4e3ed4 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -84,16 +84,6 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
}
}
-// Instruction order - return deterministic order suitable as set
-// order for EquivalenceClasses.
-unsigned int Float2IntPass::insOrder(Instruction* I) {
- static unsigned int order = 0;
- if (InstructionOrders.find(I) != InstructionOrders.end())
- return InstructionOrders[I];
- InstructionOrders[I] = order++;
- return order - 1;
-}
-
// Find the roots - instructions that convert from the FP domain to
// integer domain.
void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) {
@@ -201,7 +191,7 @@ void Float2IntPass::walkBackwards() {
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
// Unify def-use chains if they interfere.
- ECs.unionSets(OrderedInstruction(I, insOrder(I)), OrderedInstruction(OI, insOrder(OI)));
+ ECs.unionSets(I, OI);
if (SeenInsts.find(I)->second != badRange())
Worklist.push_back(OI);
} else if (!isa<ConstantFP>(O)) {
@@ -333,8 +323,7 @@ bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
// For every member of the partition, union all the ranges together.
for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
MI != ME; ++MI) {
- OrderedInstruction OMI = *MI;
- Instruction *I = OMI.getInstruction();
+ Instruction *I = *MI;
auto SeenI = SeenInsts.find(I);
if (SeenI == SeenInsts.end())
continue;
@@ -403,10 +392,9 @@ bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
}
}
- for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); MI != ME; ++MI) {
- OrderedInstruction OMI = *MI;
- convert(OMI.getInstruction(), Ty);
- }
+ for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+ MI != ME; ++MI)
+ convert(*MI, Ty);
MadeChange = true;
}
@@ -497,9 +485,8 @@ void Float2IntPass::cleanup() {
bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
// Clear out all state.
- ECs = EquivalenceClasses<OrderedInstruction, OrderedInstructionLess<OrderedInstruction> >();
+ ECs = EquivalenceClasses<Instruction*>();
SeenInsts.clear();
- InstructionOrders.clear();
ConvertedInsts.clear();
Roots.clear();
More information about the llvm-commits
mailing list