[llvm] r335815 - [llvm-exegesis] Add partial X87 support.
Clement Courbet via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 28 00:41:16 PDT 2018
Author: courbet
Date: Thu Jun 28 00:41:16 2018
New Revision: 335815
URL: http://llvm.org/viewvc/llvm-project?rev=335815&view=rev
Log:
[llvm-exegesis] Add partial X87 support.
Summary:
This enables the X86-specific X86FloatingPointStackifierPass, and allow
llvm-exegesis to generate and measure X87 latency/uops for some FP ops.
Reviewers: gchatelet
Subscribers: tschuett, llvm-commits
Differential Revision: https://reviews.llvm.org/D48592
Modified:
llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h
llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp
llvm/trunk/tools/llvm-exegesis/lib/Latency.h
llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp
Modified: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp Thu Jun 28 00:41:16 2018
@@ -196,4 +196,25 @@ BenchmarkRunner::writeObjectFile(const B
return ResultPath.str();
}
+llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
+ const Instruction &Instr) const {
+ const AliasingConfigurations SelfAliasing(Instr, Instr);
+ if (SelfAliasing.empty()) {
+ return llvm::make_error<BenchmarkFailure>("empty self aliasing");
+ }
+ SnippetPrototype Prototype;
+ InstructionInstance II(Instr);
+ if (SelfAliasing.hasImplicitAliasing()) {
+ Prototype.Explanation = "implicit Self cycles, picking random values.";
+ } else {
+ Prototype.Explanation =
+ "explicit self cycles, selecting one aliasing Conf.";
+ // This is a self aliasing instruction so defs and uses are from the same
+ // instance, hence twice II in the following call.
+ setRandomAliasing(SelfAliasing, II, II);
+ }
+ Prototype.Snippet.push_back(std::move(II));
+ return std::move(Prototype);
+}
+
} // namespace exegesis
Modified: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h Thu Jun 28 00:41:16 2018
@@ -69,6 +69,9 @@ protected:
const LLVMState &State;
const RegisterAliasingTrackerCache RATC;
+ llvm::Expected<SnippetPrototype> generateSelfAliasingPrototype(
+ const Instruction &Instr) const;
+
private:
// API to be implemented by subclasses.
virtual llvm::Expected<SnippetPrototype>
Modified: llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp Thu Jun 28 00:41:16 2018
@@ -43,28 +43,8 @@ llvm::Error LatencyBenchmarkRunner::isIn
}
llvm::Expected<SnippetPrototype>
-LatencyBenchmarkRunner::generateSelfAliasingPrototype(
- const Instruction &Instr,
- const AliasingConfigurations &SelfAliasing) const {
- SnippetPrototype Prototype;
- InstructionInstance II(Instr);
- if (SelfAliasing.hasImplicitAliasing()) {
- Prototype.Explanation = "implicit Self cycles, picking random values.";
- } else {
- Prototype.Explanation =
- "explicit self cycles, selecting one aliasing Conf.";
- // This is a self aliasing instruction so defs and uses are from the same
- // instance, hence twice II in the following call.
- setRandomAliasing(SelfAliasing, II, II);
- }
- Prototype.Snippet.push_back(std::move(II));
- return std::move(Prototype);
-}
-
-llvm::Expected<SnippetPrototype>
LatencyBenchmarkRunner::generateTwoInstructionPrototype(
- const Instruction &Instr,
- const AliasingConfigurations &SelfAliasing) const {
+ const Instruction &Instr) const {
std::vector<unsigned> Opcodes;
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
std::iota(Opcodes.begin(), Opcodes.end(), 0U);
@@ -89,8 +69,9 @@ LatencyBenchmarkRunner::generateTwoInstr
if (!Back.hasImplicitAliasing())
setRandomAliasing(Back, OtherII, ThisII);
SnippetPrototype Prototype;
- Prototype.Explanation = llvm::formatv("creating cycle through {0}.",
- State.getInstrInfo().getName(OtherOpcode));
+ Prototype.Explanation =
+ llvm::formatv("creating cycle through {0}.",
+ State.getInstrInfo().getName(OtherOpcode));
Prototype.Snippet.push_back(std::move(ThisII));
Prototype.Snippet.push_back(std::move(OtherII));
return std::move(Prototype);
@@ -105,13 +86,12 @@ LatencyBenchmarkRunner::generatePrototyp
if (auto E = isInfeasible(InstrDesc))
return std::move(E);
const Instruction Instr(InstrDesc, RATC);
- const AliasingConfigurations SelfAliasing(Instr, Instr);
- if (SelfAliasing.empty()) {
- // No self aliasing, trying to create a dependency through another opcode.
- return generateTwoInstructionPrototype(Instr, SelfAliasing);
- } else {
- return generateSelfAliasingPrototype(Instr, SelfAliasing);
- }
+ if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr))
+ return SelfAliasingPrototype;
+ else
+ llvm::consumeError(SelfAliasingPrototype.takeError());
+ // No self aliasing, trying to create a dependency through another opcode.
+ return generateTwoInstructionPrototype(Instr);
}
std::vector<BenchmarkMeasure>
Modified: llvm/trunk/tools/llvm-exegesis/lib/Latency.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/Latency.h?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/Latency.h (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/Latency.h Thu Jun 28 00:41:16 2018
@@ -32,13 +32,8 @@ public:
private:
llvm::Error isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const;
- llvm::Expected<SnippetPrototype> generateSelfAliasingPrototype(
- const Instruction &Instr,
- const AliasingConfigurations &SelfAliasing) const;
-
llvm::Expected<SnippetPrototype> generateTwoInstructionPrototype(
- const Instruction &Instr,
- const AliasingConfigurations &SelfAliasing) const;
+ const Instruction &Instr) const;
std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF,
Modified: llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp Thu Jun 28 00:41:16 2018
@@ -10,6 +10,7 @@
#include "../Latency.h"
#include "../Uops.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86.h"
#include "X86RegisterInfo.h"
@@ -17,43 +18,107 @@
namespace exegesis {
-// Test whether we can generate a snippet for this instruction.
-static llvm::Error shouldRun(const LLVMState &State, const unsigned Opcode) {
- const auto &InstrInfo = State.getInstrInfo();
- const auto OpcodeName = InstrInfo.getName(Opcode);
- if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
- OpcodeName.startswith("ADJCALLSTACK")) {
- return llvm::make_error<BenchmarkFailure>(
- "Unsupported opcode: Push/Pop/AdjCallStack");
- }
- return llvm::ErrorSuccess();
-}
-
namespace {
-class X86LatencyBenchmarkRunner : public LatencyBenchmarkRunner {
-private:
- using LatencyBenchmarkRunner::LatencyBenchmarkRunner;
+// Common code for X86 Uops and Latency runners.
+template <typename Impl> class X86BenchmarkRunner : public Impl {
+ using Impl::Impl;
llvm::Expected<SnippetPrototype>
generatePrototype(unsigned Opcode) const override {
- if (llvm::Error E = shouldRun(State, Opcode)) {
- return std::move(E);
+ // Test whether we can generate a snippet for this instruction.
+ const auto &InstrInfo = this->State.getInstrInfo();
+ const auto OpcodeName = InstrInfo.getName(Opcode);
+ if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
+ OpcodeName.startswith("ADJCALLSTACK")) {
+ return llvm::make_error<BenchmarkFailure>(
+ "Unsupported opcode: Push/Pop/AdjCallStack");
+ }
+
+ // Handle X87.
+ const auto &InstrDesc = InstrInfo.get(Opcode);
+ const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
+ const Instruction Instr(InstrDesc, this->RATC);
+ switch (FPInstClass) {
+ case llvm::X86II::NotFP:
+ break;
+ case llvm::X86II::ZeroArgFP:
+ return Impl::handleZeroArgFP(Instr);
+ case llvm::X86II::OneArgFP:
+ return Impl::handleOneArgFP(Instr); // fstp ST(0)
+ case llvm::X86II::OneArgFPRW:
+ case llvm::X86II::TwoArgFP: {
+ // These are instructions like
+ // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
+ // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
+ // They are intrinsically serial and do not modify the state of the stack.
+ // We generate the same code for latency and uops.
+ return this->generateSelfAliasingPrototype(Instr);
+ }
+ case llvm::X86II::CompareFP:
+ return Impl::handleCompareFP(Instr);
+ case llvm::X86II::CondMovFP:
+ return Impl::handleCondMovFP(Instr);
+ case llvm::X86II::SpecialFP:
+ return Impl::handleSpecialFP(Instr);
+ default:
+ llvm_unreachable("Unknown FP Type!");
}
- return LatencyBenchmarkRunner::generatePrototype(Opcode);
+
+ // Fallback to generic implementation.
+ return Impl::Base::generatePrototype(Opcode);
}
};
-class X86UopsBenchmarkRunner : public UopsBenchmarkRunner {
-private:
- using UopsBenchmarkRunner::UopsBenchmarkRunner;
+class X86LatencyImpl : public LatencyBenchmarkRunner {
+protected:
+ using Base = LatencyBenchmarkRunner;
+ using Base::Base;
+ llvm::Expected<SnippetPrototype>
+ handleZeroArgFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
+ }
+ llvm::Expected<SnippetPrototype>
+ handleOneArgFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
+ }
+ llvm::Expected<SnippetPrototype>
+ handleCompareFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
+ }
+ llvm::Expected<SnippetPrototype>
+ handleCondMovFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
+ }
+ llvm::Expected<SnippetPrototype>
+ handleSpecialFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
+ }
+};
+class X86UopsImpl : public UopsBenchmarkRunner {
+protected:
+ using Base = UopsBenchmarkRunner;
+ using Base::Base;
llvm::Expected<SnippetPrototype>
- generatePrototype(unsigned Opcode) const override {
- if (llvm::Error E = shouldRun(State, Opcode)) {
- return std::move(E);
- }
- return UopsBenchmarkRunner::generatePrototype(Opcode);
+ handleZeroArgFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
+ }
+ llvm::Expected<SnippetPrototype>
+ handleOneArgFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
+ }
+ llvm::Expected<SnippetPrototype>
+ handleCompareFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
+ }
+ llvm::Expected<SnippetPrototype>
+ handleCondMovFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
+ }
+ llvm::Expected<SnippetPrototype>
+ handleSpecialFP(const Instruction &Instr) const {
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
}
};
@@ -62,15 +127,11 @@ class ExegesisX86Target : public Exegesi
// Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
// FIXME: Enable when the exegesis assembler no longer does
// Properties.reset(TracksLiveness);
- // PM.add(llvm::createX86FloatingPointStackifierPass());
+ PM.add(llvm::createX86FloatingPointStackifierPass());
}
std::vector<llvm::MCInst>
setRegToConstant(unsigned Reg) const override {
- // FIXME: Handle FP stack:
- // llvm::X86::RFP32RegClass
- // llvm::X86::RFP64RegClass
- // llvm::X86::RFP80RegClass
if (llvm::X86::GR8RegClass.contains(Reg)) {
return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
}
@@ -92,17 +153,23 @@ class ExegesisX86Target : public Exegesi
if (llvm::X86::VR512RegClass.contains(Reg)) {
return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU64Zrm);
}
+ if (llvm::X86::RFP32RegClass.contains(Reg) ||
+ llvm::X86::RFP64RegClass.contains(Reg) ||
+ llvm::X86::RFP80RegClass.contains(Reg)) {
+ return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
+ }
return {};
}
std::unique_ptr<BenchmarkRunner>
createLatencyBenchmarkRunner(const LLVMState &State) const override {
- return llvm::make_unique<X86LatencyBenchmarkRunner>(State);
+ return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(
+ State);
}
std::unique_ptr<BenchmarkRunner>
createUopsBenchmarkRunner(const LLVMState &State) const override {
- return llvm::make_unique<X86UopsBenchmarkRunner>(State);
+ return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
}
bool matchesArch(llvm::Triple::ArchType Arch) const override {
More information about the llvm-commits
mailing list