[llvm] r335815 - [llvm-exegesis] Add partial X87 support.

Clement Courbet via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 28 00:41:16 PDT 2018


Author: courbet
Date: Thu Jun 28 00:41:16 2018
New Revision: 335815

URL: http://llvm.org/viewvc/llvm-project?rev=335815&view=rev
Log:
[llvm-exegesis] Add partial X87 support.

Summary:
This enables the X86-specific X86FloatingPointStackifierPass, and allow
llvm-exegesis to generate and measure X87 latency/uops for some FP ops.

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits

Differential Revision: https://reviews.llvm.org/D48592

Modified:
    llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
    llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h
    llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp
    llvm/trunk/tools/llvm-exegesis/lib/Latency.h
    llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp

Modified: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp Thu Jun 28 00:41:16 2018
@@ -196,4 +196,25 @@ BenchmarkRunner::writeObjectFile(const B
   return ResultPath.str();
 }
 
+llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
+    const Instruction &Instr) const {
+  const AliasingConfigurations SelfAliasing(Instr, Instr);
+  if (SelfAliasing.empty()) {
+    return llvm::make_error<BenchmarkFailure>("empty self aliasing");
+  }
+  SnippetPrototype Prototype;
+  InstructionInstance II(Instr);
+  if (SelfAliasing.hasImplicitAliasing()) {
+    Prototype.Explanation = "implicit Self cycles, picking random values.";
+  } else {
+    Prototype.Explanation =
+        "explicit self cycles, selecting one aliasing Conf.";
+    // This is a self aliasing instruction so defs and uses are from the same
+    // instance, hence twice II in the following call.
+    setRandomAliasing(SelfAliasing, II, II);
+  }
+  Prototype.Snippet.push_back(std::move(II));
+  return std::move(Prototype);
+}
+
 } // namespace exegesis

Modified: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h Thu Jun 28 00:41:16 2018
@@ -69,6 +69,9 @@ protected:
   const LLVMState &State;
   const RegisterAliasingTrackerCache RATC;
 
+  llvm::Expected<SnippetPrototype> generateSelfAliasingPrototype(
+      const Instruction &Instr) const;
+
 private:
   // API to be implemented by subclasses.
   virtual llvm::Expected<SnippetPrototype>

Modified: llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp Thu Jun 28 00:41:16 2018
@@ -43,28 +43,8 @@ llvm::Error LatencyBenchmarkRunner::isIn
 }
 
 llvm::Expected<SnippetPrototype>
-LatencyBenchmarkRunner::generateSelfAliasingPrototype(
-    const Instruction &Instr,
-    const AliasingConfigurations &SelfAliasing) const {
-  SnippetPrototype Prototype;
-  InstructionInstance II(Instr);
-  if (SelfAliasing.hasImplicitAliasing()) {
-    Prototype.Explanation = "implicit Self cycles, picking random values.";
-  } else {
-    Prototype.Explanation =
-        "explicit self cycles, selecting one aliasing Conf.";
-    // This is a self aliasing instruction so defs and uses are from the same
-    // instance, hence twice II in the following call.
-    setRandomAliasing(SelfAliasing, II, II);
-  }
-  Prototype.Snippet.push_back(std::move(II));
-  return std::move(Prototype);
-}
-
-llvm::Expected<SnippetPrototype>
 LatencyBenchmarkRunner::generateTwoInstructionPrototype(
-    const Instruction &Instr,
-    const AliasingConfigurations &SelfAliasing) const {
+    const Instruction &Instr) const {
   std::vector<unsigned> Opcodes;
   Opcodes.resize(State.getInstrInfo().getNumOpcodes());
   std::iota(Opcodes.begin(), Opcodes.end(), 0U);
@@ -89,8 +69,9 @@ LatencyBenchmarkRunner::generateTwoInstr
     if (!Back.hasImplicitAliasing())
       setRandomAliasing(Back, OtherII, ThisII);
     SnippetPrototype Prototype;
-    Prototype.Explanation = llvm::formatv("creating cycle through {0}.",
-                                          State.getInstrInfo().getName(OtherOpcode));
+    Prototype.Explanation =
+        llvm::formatv("creating cycle through {0}.",
+                      State.getInstrInfo().getName(OtherOpcode));
     Prototype.Snippet.push_back(std::move(ThisII));
     Prototype.Snippet.push_back(std::move(OtherII));
     return std::move(Prototype);
@@ -105,13 +86,12 @@ LatencyBenchmarkRunner::generatePrototyp
   if (auto E = isInfeasible(InstrDesc))
     return std::move(E);
   const Instruction Instr(InstrDesc, RATC);
-  const AliasingConfigurations SelfAliasing(Instr, Instr);
-  if (SelfAliasing.empty()) {
-    // No self aliasing, trying to create a dependency through another opcode.
-    return generateTwoInstructionPrototype(Instr, SelfAliasing);
-  } else {
-    return generateSelfAliasingPrototype(Instr, SelfAliasing);
-  }
+  if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr))
+    return SelfAliasingPrototype;
+  else
+    llvm::consumeError(SelfAliasingPrototype.takeError());
+  // No self aliasing, trying to create a dependency through another opcode.
+  return generateTwoInstructionPrototype(Instr);
 }
 
 std::vector<BenchmarkMeasure>

Modified: llvm/trunk/tools/llvm-exegesis/lib/Latency.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/Latency.h?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/Latency.h (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/Latency.h Thu Jun 28 00:41:16 2018
@@ -32,13 +32,8 @@ public:
 private:
   llvm::Error isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const;
 
-  llvm::Expected<SnippetPrototype> generateSelfAliasingPrototype(
-      const Instruction &Instr,
-      const AliasingConfigurations &SelfAliasing) const;
-
   llvm::Expected<SnippetPrototype> generateTwoInstructionPrototype(
-      const Instruction &Instr,
-      const AliasingConfigurations &SelfAliasing) const;
+      const Instruction &Instr) const;
 
   std::vector<BenchmarkMeasure>
   runMeasurements(const ExecutableFunction &EF,

Modified: llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp?rev=335815&r1=335814&r2=335815&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp Thu Jun 28 00:41:16 2018
@@ -10,6 +10,7 @@
 
 #include "../Latency.h"
 #include "../Uops.h"
+#include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "X86.h"
 #include "X86RegisterInfo.h"
@@ -17,43 +18,107 @@
 
 namespace exegesis {
 
-// Test whether we can generate a snippet for this instruction.
-static llvm::Error shouldRun(const LLVMState &State, const unsigned Opcode) {
-  const auto &InstrInfo = State.getInstrInfo();
-  const auto OpcodeName = InstrInfo.getName(Opcode);
-  if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
-      OpcodeName.startswith("ADJCALLSTACK")) {
-    return llvm::make_error<BenchmarkFailure>(
-        "Unsupported opcode: Push/Pop/AdjCallStack");
-  }
-  return llvm::ErrorSuccess();
-}
-
 namespace {
 
-class X86LatencyBenchmarkRunner : public LatencyBenchmarkRunner {
-private:
-  using LatencyBenchmarkRunner::LatencyBenchmarkRunner;
+// Common code for X86 Uops and Latency runners.
+template <typename Impl> class X86BenchmarkRunner : public Impl {
+  using Impl::Impl;
 
   llvm::Expected<SnippetPrototype>
   generatePrototype(unsigned Opcode) const override {
-    if (llvm::Error E = shouldRun(State, Opcode)) {
-      return std::move(E);
+    // Test whether we can generate a snippet for this instruction.
+    const auto &InstrInfo = this->State.getInstrInfo();
+    const auto OpcodeName = InstrInfo.getName(Opcode);
+    if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
+        OpcodeName.startswith("ADJCALLSTACK")) {
+      return llvm::make_error<BenchmarkFailure>(
+          "Unsupported opcode: Push/Pop/AdjCallStack");
+    }
+
+    // Handle X87.
+    const auto &InstrDesc = InstrInfo.get(Opcode);
+    const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
+    const Instruction Instr(InstrDesc, this->RATC);
+    switch (FPInstClass) {
+    case llvm::X86II::NotFP:
+      break;
+    case llvm::X86II::ZeroArgFP:
+      return Impl::handleZeroArgFP(Instr);
+    case llvm::X86II::OneArgFP:
+      return Impl::handleOneArgFP(Instr); // fstp ST(0)
+    case llvm::X86II::OneArgFPRW:
+    case llvm::X86II::TwoArgFP: {
+      // These are instructions like
+      //   - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
+      //   - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
+      // They are intrinsically serial and do not modify the state of the stack.
+      // We generate the same code for latency and uops.
+      return this->generateSelfAliasingPrototype(Instr);
+    }
+    case llvm::X86II::CompareFP:
+      return Impl::handleCompareFP(Instr);
+    case llvm::X86II::CondMovFP:
+      return Impl::handleCondMovFP(Instr);
+    case llvm::X86II::SpecialFP:
+      return Impl::handleSpecialFP(Instr);
+    default:
+      llvm_unreachable("Unknown FP Type!");
     }
-    return LatencyBenchmarkRunner::generatePrototype(Opcode);
+
+    // Fallback to generic implementation.
+    return Impl::Base::generatePrototype(Opcode);
   }
 };
 
-class X86UopsBenchmarkRunner : public UopsBenchmarkRunner {
-private:
-  using UopsBenchmarkRunner::UopsBenchmarkRunner;
+class X86LatencyImpl : public LatencyBenchmarkRunner {
+protected:
+  using Base = LatencyBenchmarkRunner;
+  using Base::Base;
+  llvm::Expected<SnippetPrototype>
+  handleZeroArgFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
+  }
+  llvm::Expected<SnippetPrototype>
+  handleOneArgFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
+  }
+  llvm::Expected<SnippetPrototype>
+  handleCompareFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
+  }
+  llvm::Expected<SnippetPrototype>
+  handleCondMovFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
+  }
+  llvm::Expected<SnippetPrototype>
+  handleSpecialFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
+  }
+};
 
+class X86UopsImpl : public UopsBenchmarkRunner {
+protected:
+  using Base = UopsBenchmarkRunner;
+  using Base::Base;
   llvm::Expected<SnippetPrototype>
-  generatePrototype(unsigned Opcode) const override {
-    if (llvm::Error E = shouldRun(State, Opcode)) {
-      return std::move(E);
-    }
-    return UopsBenchmarkRunner::generatePrototype(Opcode);
+  handleZeroArgFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
+  }
+  llvm::Expected<SnippetPrototype>
+  handleOneArgFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
+  }
+  llvm::Expected<SnippetPrototype>
+  handleCompareFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
+  }
+  llvm::Expected<SnippetPrototype>
+  handleCondMovFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
+  }
+  llvm::Expected<SnippetPrototype>
+  handleSpecialFP(const Instruction &Instr) const {
+    return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
   }
 };
 
@@ -62,15 +127,11 @@ class ExegesisX86Target : public Exegesi
     // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
     // FIXME: Enable when the exegesis assembler no longer does
     // Properties.reset(TracksLiveness);
-    // PM.add(llvm::createX86FloatingPointStackifierPass());
+    PM.add(llvm::createX86FloatingPointStackifierPass());
   }
 
   std::vector<llvm::MCInst>
   setRegToConstant(unsigned Reg) const override {
-    // FIXME: Handle FP stack:
-    // llvm::X86::RFP32RegClass
-    // llvm::X86::RFP64RegClass
-    // llvm::X86::RFP80RegClass
     if (llvm::X86::GR8RegClass.contains(Reg)) {
       return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
     }
@@ -92,17 +153,23 @@ class ExegesisX86Target : public Exegesi
     if (llvm::X86::VR512RegClass.contains(Reg)) {
       return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU64Zrm);
     }
+    if (llvm::X86::RFP32RegClass.contains(Reg) ||
+        llvm::X86::RFP64RegClass.contains(Reg) ||
+        llvm::X86::RFP80RegClass.contains(Reg)) {
+      return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
+    }
     return {};
   }
 
   std::unique_ptr<BenchmarkRunner>
   createLatencyBenchmarkRunner(const LLVMState &State) const override {
-    return llvm::make_unique<X86LatencyBenchmarkRunner>(State);
+    return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(
+        State);
   }
 
   std::unique_ptr<BenchmarkRunner>
   createUopsBenchmarkRunner(const LLVMState &State) const override {
-    return llvm::make_unique<X86UopsBenchmarkRunner>(State);
+    return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
   }
 
   bool matchesArch(llvm::Triple::ArchType Arch) const override {




More information about the llvm-commits mailing list