[llvm] [llvm-exegesis] Add loop-register snippet annotation (PR #82873)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 27 01:28:38 PST 2024
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/82873
>From c6cf0a449efbc307595245c1498115d35807b8dd Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sat, 24 Feb 2024 09:45:58 +0000
Subject: [PATCH 1/2] [llvm-exegesis] Add loop-register snippet annotation
This patch adds a LLVM-EXEGESIS-LOOP-REGISTER snippet annotation which
allows a user to specify the register to use for the loop counter in the
loop repetition mode. This allows for executing snippets that don't work
with the default value (currently R8 on X86).
---
.../llvm-exegesis/X86/latency/loop-register.s | 12 ++++++
.../tools/llvm-exegesis/lib/BenchmarkResult.h | 2 +
llvm/tools/llvm-exegesis/lib/SnippetFile.cpp | 20 ++++++++++
.../llvm-exegesis/lib/SnippetRepetitor.cpp | 14 +++----
.../llvm-exegesis/lib/SnippetRepetitor.h | 3 +-
llvm/tools/llvm-exegesis/lib/Target.h | 7 +++-
llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 18 +++++----
llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 37 ++++++++++++-------
.../llvm-exegesis/X86/SnippetFileTest.cpp | 19 ++++++++++
.../X86/SnippetRepetitorTest.cpp | 16 +++++---
10 files changed, 109 insertions(+), 39 deletions(-)
create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/loop-register.s
diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/loop-register.s b/llvm/test/tools/llvm-exegesis/X86/latency/loop-register.s
new file mode 100644
index 00000000000000..81ca75251381ad
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/loop-register.s
@@ -0,0 +1,12 @@
+# REQUIRES: exegesis-can-measure-latency, x86_64-linux
+
+# Test that specifying the loop register to use works as expected.
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s | FileCheck %s
+
+# CHECK: measurements:
+
+# LLVM-EXEGESIS-DEFREG R11 ff
+# LLVM-EXEGESIS-LOOP-REGISTER R12
+
+addq $0xff, %r11
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index 0aecaaeea4b2e7..4ae6bc2a54cd50 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -74,6 +74,8 @@ struct BenchmarkKey {
// The address that the snippet should be loaded in at if the execution mode
// being used supports it.
intptr_t SnippetAddress = 0;
+ // The register that should be used to hold the loop counter.
+ unsigned LoopRegister;
};
struct BenchmarkMeasure {
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp b/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp
index 7258fcb4279c7d..431d99c72b8086 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp
@@ -9,6 +9,7 @@
#include "SnippetFile.h"
#include "BenchmarkRunner.h"
#include "Error.h"
+#include "Target.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectFileInfo.h"
@@ -175,6 +176,20 @@ class BenchmarkCodeStreamer : public MCStreamer, public AsmCommentConsumer {
return;
}
+ if (CommentText.consume_front("LOOP-REGISTER")) {
+ // LLVM-EXEGESIS-LOOP-REGISTER <loop register>
+ unsigned LoopRegister;
+
+ if (!(LoopRegister = findRegisterByName(CommentText.trim()))) {
+ errs() << "unknown register '" << CommentText
+ << "' in 'LLVM-EXEGESIS-LOOP-REGISTER " << CommentText << "'\n";
+ ++InvalidComments;
+ return;
+ }
+
+ Result->Key.LoopRegister = LoopRegister;
+ return;
+ }
}
unsigned numInvalidComments() const { return InvalidComments; }
@@ -221,6 +236,11 @@ Expected<std::vector<BenchmarkCode>> readSnippets(const LLVMState &State,
BenchmarkCode Result;
+ // Ensure that there is a default loop register value specified.
+ Result.Key.LoopRegister =
+ State.getExegesisTarget().getDefaultLoopCounterRegister(
+ State.getTargetMachine().getTargetTriple());
+
const TargetMachine &TM = State.getTargetMachine();
MCContext Context(TM.getTargetTriple(), TM.getMCAsmInfo(),
TM.getMCRegisterInfo(), TM.getMCSubtargetInfo());
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
index 561687a62319b3..0bab30d1582003 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
@@ -48,10 +48,8 @@ class DuplicateSnippetRepetitor : public SnippetRepetitor {
class LoopSnippetRepetitor : public SnippetRepetitor {
public:
- explicit LoopSnippetRepetitor(const LLVMState &State)
- : SnippetRepetitor(State),
- LoopCounter(State.getExegesisTarget().getLoopCounterRegister(
- State.getTargetMachine().getTargetTriple())) {}
+ explicit LoopSnippetRepetitor(const LLVMState &State, unsigned LoopRegister)
+ : SnippetRepetitor(State), LoopCounter(LoopRegister) {}
// Loop over the snippet ceil(MinInstructions / Instructions.Size()) times.
FillFunction Repeat(ArrayRef<MCInst> Instructions, unsigned MinInstructions,
@@ -113,8 +111,8 @@ class LoopSnippetRepetitor : public SnippetRepetitor {
(void)_;
Loop.addInstructions(Instructions);
}
- ET.decrementLoopCounterAndJump(*Loop.MBB, *Loop.MBB,
- State.getInstrInfo());
+ ET.decrementLoopCounterAndJump(*Loop.MBB, *Loop.MBB, State.getInstrInfo(),
+ LoopCounter);
// Set up the exit basic block.
Loop.MBB->addSuccessor(Exit.MBB, BranchProbability::getZero());
@@ -138,14 +136,14 @@ SnippetRepetitor::~SnippetRepetitor() {}
std::unique_ptr<const SnippetRepetitor>
SnippetRepetitor::Create(Benchmark::RepetitionModeE Mode,
- const LLVMState &State) {
+ const LLVMState &State, unsigned LoopRegister) {
switch (Mode) {
case Benchmark::Duplicate:
case Benchmark::MiddleHalfDuplicate:
return std::make_unique<DuplicateSnippetRepetitor>(State);
case Benchmark::Loop:
case Benchmark::MiddleHalfLoop:
- return std::make_unique<LoopSnippetRepetitor>(State);
+ return std::make_unique<LoopSnippetRepetitor>(State, LoopRegister);
case Benchmark::AggregateMin:
break;
}
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h
index 2b3c416c9029f7..c62e80f161f128 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h
+++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h
@@ -29,7 +29,8 @@ namespace exegesis {
class SnippetRepetitor {
public:
static std::unique_ptr<const SnippetRepetitor>
- Create(Benchmark::RepetitionModeE Mode, const LLVMState &State);
+ Create(Benchmark::RepetitionModeE Mode, const LLVMState &State,
+ unsigned LoopRegister);
virtual ~SnippetRepetitor();
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 7bbd946b03331f..522c75d15703d5 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -202,12 +202,15 @@ class ExegesisTarget {
}
// Returns a counter usable as a loop counter.
- virtual unsigned getLoopCounterRegister(const Triple &) const { return 0; }
+ virtual unsigned getDefaultLoopCounterRegister(const Triple &) const {
+ return 0;
+ }
// Adds the code to decrement the loop counter and
virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
MachineBasicBlock &TargetMBB,
- const MCInstrInfo &MII) const {
+ const MCInstrInfo &MII,
+ unsigned LoopRegister) const {
llvm_unreachable("decrementLoopCounterAndBranch() requires "
"getLoopCounterRegister() > 0");
}
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 6fc951a6e35d6a..a41a995f5560af 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -720,7 +720,7 @@ class ExegesisX86Target : public ExegesisTarget {
unsigned getScratchMemoryRegister(const Triple &TT) const override;
- unsigned getLoopCounterRegister(const Triple &) const override;
+ unsigned getDefaultLoopCounterRegister(const Triple &) const override;
unsigned getMaxMemoryAccessSize() const override { return 64; }
@@ -733,7 +733,8 @@ class ExegesisX86Target : public ExegesisTarget {
void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
MachineBasicBlock &TargetMBB,
- const MCInstrInfo &MII) const override;
+ const MCInstrInfo &MII,
+ unsigned LoopRegister) const override;
std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg,
const APInt &Value) const override;
@@ -852,7 +853,7 @@ const unsigned ExegesisX86Target::kUnavailableRegistersSSE[12] = {
// We're using one of R8-R15 because these registers are never hardcoded in
// instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less
// conflicts.
-constexpr const unsigned kLoopCounterReg = X86::R8;
+constexpr const unsigned kDefaultLoopCounterReg = X86::R8;
} // namespace
@@ -870,11 +871,12 @@ unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const {
return TT.isOSWindows() ? X86::RCX : X86::RDI;
}
-unsigned ExegesisX86Target::getLoopCounterRegister(const Triple &TT) const {
+unsigned
+ExegesisX86Target::getDefaultLoopCounterRegister(const Triple &TT) const {
if (!TT.isArch64Bit()) {
return 0;
}
- return kLoopCounterReg;
+ return kDefaultLoopCounterReg;
}
Error ExegesisX86Target::randomizeTargetMCOperand(
@@ -912,10 +914,10 @@ void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
void ExegesisX86Target::decrementLoopCounterAndJump(
MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
- const MCInstrInfo &MII) const {
+ const MCInstrInfo &MII, unsigned LoopRegister) const {
BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8))
- .addDef(kLoopCounterReg)
- .addUse(kLoopCounterReg)
+ .addDef(LoopRegister)
+ .addUse(LoopRegister)
.addImm(-1);
BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1))
.addMBB(&TargetMBB)
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 66387bdec5a5a6..76173b93c0ec3a 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -509,22 +509,42 @@ void benchmarkMain() {
}
const auto Opcodes = getOpcodesOrDie(State);
+ std::vector<BenchmarkCode> Configurations;
+
+ unsigned LoopRegister =
+ State.getExegesisTarget().getDefaultLoopCounterRegister(
+ State.getTargetMachine().getTargetTriple());
+
+ if (Opcodes.empty()) {
+ Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
+ for (const auto &Configuration : Configurations) {
+ if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
+ (Configuration.Key.MemoryMappings.size() != 0 ||
+ Configuration.Key.MemoryValues.size() != 0 ||
+ Configuration.Key.SnippetAddress != 0))
+ ExitWithError("Memory and snippet address annotations are only "
+ "supported in subprocess "
+ "execution mode");
+ }
+ LoopRegister = Configurations[0].Key.LoopRegister;
+ }
SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;
if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
- Repetitors.emplace_back(SnippetRepetitor::Create(RepetitionMode, State));
+ Repetitors.emplace_back(
+ SnippetRepetitor::Create(RepetitionMode, State, LoopRegister));
else {
for (Benchmark::RepetitionModeE RepMode :
{Benchmark::RepetitionModeE::Duplicate,
Benchmark::RepetitionModeE::Loop})
- Repetitors.emplace_back(SnippetRepetitor::Create(RepMode, State));
+ Repetitors.emplace_back(
+ SnippetRepetitor::Create(RepMode, State, LoopRegister));
}
BitVector AllReservedRegs;
for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
AllReservedRegs |= Repetitor->getReservedRegs();
- std::vector<BenchmarkCode> Configurations;
if (!Opcodes.empty()) {
for (const unsigned Opcode : Opcodes) {
// Ignore instructions without a sched class if
@@ -546,17 +566,6 @@ void benchmarkMain() {
std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
std::back_inserter(Configurations));
}
- } else {
- Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
- for (const auto &Configuration : Configurations) {
- if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
- (Configuration.Key.MemoryMappings.size() != 0 ||
- Configuration.Key.MemoryValues.size() != 0 ||
- Configuration.Key.SnippetAddress != 0))
- ExitWithError("Memory and snippet address annotations are only "
- "supported in subprocess "
- "execution mode");
- }
}
if (MinInstructions == 0) {
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp
index 505a030675f64c..f1fa891171177c 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp
@@ -219,6 +219,25 @@ TEST_F(X86SnippetFileTest, SnippetAddress) {
EXPECT_EQ(Snippet.Key.SnippetAddress, 0x10000);
}
+TEST_F(X86SnippetFileTest, LoopRegister) {
+ auto Snippets = TestCommon(R"(
+ # LLVM-EXEGESIS-LOOP-REGISTER R11
+ )");
+ ASSERT_TRUE(static_cast<bool>(Snippets));
+ EXPECT_THAT(*Snippets, SizeIs(1));
+ const auto &Snippet = (*Snippets)[0];
+ EXPECT_EQ(Snippet.Key.LoopRegister, X86::R11);
+}
+
+TEST_F(X86SnippetFileTest, LoopRegisterInvalidRegister) {
+ auto Error = TestCommon(R"(
+ # LLVM-EXEGESIS-LOOP-REGISTER INVALID
+ )")
+ .takeError();
+ EXPECT_TRUE(static_cast<bool>(Error));
+ consumeError(std::move(Error));
+}
+
} // namespace
} // namespace exegesis
} // namespace llvm
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
index 25e8836087c15d..b55ca5057ae01c 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
@@ -40,7 +40,10 @@ class X86SnippetRepetitorTest : public X86TestBase {
void TestCommon(Benchmark::RepetitionModeE RepetitionMode,
unsigned SnippetInstructions = 1) {
- const auto Repetitor = SnippetRepetitor::Create(RepetitionMode, State);
+ const auto Repetitor = SnippetRepetitor::Create(
+ RepetitionMode, State,
+ State.getExegesisTarget().getDefaultLoopCounterRegister(
+ State.getTargetMachine().getTargetTriple()));
const std::vector<MCInst> Instructions(SnippetInstructions,
MCInstBuilder(X86::NOOP));
FunctionFiller Sink(*MF, {X86::EAX});
@@ -98,11 +101,12 @@ TEST_F(X86SnippetRepetitorTest, Loop) {
HasOpcode(X86::NOOP), HasOpcode(X86::NOOP),
HasOpcode(X86::NOOP), HasOpcode(X86::ADD64ri8),
HasOpcode(X86::JCC_1)));
- EXPECT_THAT(LoopBlock.liveins(),
- UnorderedElementsAre(
- LiveReg(X86::EAX),
- LiveReg(State.getExegesisTarget().getLoopCounterRegister(
- State.getTargetMachine().getTargetTriple()))));
+ EXPECT_THAT(
+ LoopBlock.liveins(),
+ UnorderedElementsAre(
+ LiveReg(X86::EAX),
+ LiveReg(State.getExegesisTarget().getDefaultLoopCounterRegister(
+ State.getTargetMachine().getTargetTriple()))));
EXPECT_THAT(MF->getBlockNumbered(2)->instrs(),
ElementsAre(HasOpcode(X86::RET64)));
}
>From 726f4d549f31de89ad39250967133e76846bdb9c Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Tue, 27 Feb 2024 01:28:27 -0800
Subject: [PATCH 2/2] Update docs
---
llvm/docs/CommandGuide/llvm-exegesis.rst | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index 9e3c19078f1cce..fdf17c7fe41285 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -89,6 +89,14 @@ properly.
annotation requires the subprocess execution mode. This is useful in
cases where the memory accessed by the snippet depends on the location
of the snippet, like RIP-relative addressing.
+* `LLVM-EXEGESIS-LOOP-REGISTER <register name>` - This annotation specifies
+ the loop register to use for keeping track of the current iteration when
+ using the loop repetition mode. :program:`llvm-exegesis` needs to keep track
+ of the current loop iteration within the loop repetition mode in a performant
+ manner (i.e., no memory accesses), and uses a register to do this. This register
+ has an architecture specific default (e.g., `R8` on X86), but this might conflict
+ with some snippets. This annotation allows changing the register to prevent
+ interference between the loop index register and the snippet.
EXAMPLE 1: benchmarking instructions
------------------------------------
More information about the llvm-commits
mailing list