[llvm] [X86] X86LowerTileCopy - Find dead register to use to prevent save-reload (PR #83628)
Kirill Stoimenov via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 14 07:58:41 PDT 2024
https://github.com/kstoimenov updated https://github.com/llvm/llvm-project/pull/83628
>From 10f95c2fccd25ba158c663747f4b9d76e5590d87 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Fri, 1 Mar 2024 17:47:18 -0500
Subject: [PATCH] Find dead register to use to prevent save-reload
---
llvm/lib/Target/X86/X86LowerTileCopy.cpp | 63 ++++++++++++++++++------
1 file changed, 48 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index e7afc49240e547..95fb5c19920219 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -20,6 +20,7 @@
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -90,22 +91,52 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
unsigned Size = TRI->getSpillSize(X86::TILERegClass);
Align Alignment = TRI->getSpillAlign(X86::TILERegClass);
int TileSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
- // Allocate stack slot for stride register
- Size = TRI->getSpillSize(X86::GR64RegClass);
- Alignment = TRI->getSpillAlign(X86::GR64RegClass);
- int StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
- // TODO: Pick a killed regiter to avoid save/reload. There is problem
+ int StrideSS = 0;
+
+ LiveRegUnits UsedRegs(*TRI);
+ UsedRegs.addLiveOuts(MBB);
+
+ auto InstUpToMI = MBB.end();
+ while (InstUpToMI != MI)
+ // The pre-decrement is on purpose here.
+ // We want to have the liveness right before I.
+ UsedRegs.stepBackward(*--InstUpToMI);
+
+ // Look for a temporary register to use.
+ BitVector GR64Regs =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(X86::GR64RegClassID));
+ // Pick a killed regiter to avoid save/reload. There is problem
// to get live interval in this stage.
Register GR64Cand = X86::RAX;
+ // Find the first available-register that is available
+ bool found = false;
+ for (auto RegT : GR64Regs.set_bits()) {
+ if (UsedRegs.available(RegT)) {
+ GR64Cand = RegT;
+ break;
+ }
+ }
+
const DebugLoc &DL = MI.getDebugLoc();
- // mov %rax (%sp)
- BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)), StrideSS)
- .addReg(GR64Cand);
- // mov 64 %rax
- BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
+ if (found) {
+ // mov 64 %reg
+ BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
+ } else {
+ // Allocate stack slot for stride register
+ Size = TRI->getSpillSize(X86::GR64RegClass);
+ Alignment = TRI->getSpillAlign(X86::GR64RegClass);
+ StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
+
+ // mov %reg (%sp)
+ BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)),
+ StrideSS)
+ .addReg(GR64Cand);
+ // mov 64 %reg
+ BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
+ }
// tilestored %tmm, (%sp, %idx)
#define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
@@ -120,10 +151,12 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
#undef GET_EGPR_IF_ENABLED
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
TileSS);
- // restore %rax
- // mov (%sp) %rax
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand),
- StrideSS);
+ if (!found) {
+ // restore %rax
+ // mov (%sp) %rax
+ addFrameReference(
+ BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand), StrideSS);
+ }
MI.eraseFromParent();
Changed = true;
}
More information about the llvm-commits
mailing list