[llvm] [NewPM] Port x86-issue-vzero-upper (PR #180886)
Kyungtak Woo via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 21:51:45 PST 2026
https://github.com/kevinwkt updated https://github.com/llvm/llvm-project/pull/180886
>From 73f940863c8d5131d20b98cf5430bd7807d1714e Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Mon, 9 Feb 2026 19:49:36 +0000
Subject: [PATCH 1/6] Add port for x86 for issuevzeroupper
---
llvm/lib/Target/X86/CMakeLists.txt | 2 +-
llvm/lib/Target/X86/X86.h | 8 +-
...6VZeroUpper.cpp => X86IssueVZeroUpper.cpp} | 144 ++++++++----------
llvm/lib/Target/X86/X86PassRegistry.def | 2 +-
llvm/lib/Target/X86/X86TargetMachine.cpp | 2 +-
5 files changed, 73 insertions(+), 85 deletions(-)
rename llvm/lib/Target/X86/{X86VZeroUpper.cpp => X86IssueVZeroUpper.cpp} (71%)
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index 5cffe98e8dc81..4ddd3d646ae2f 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -57,6 +57,7 @@ set(sources
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
X86ISelLoweringCall.cpp
+ X86IssueVZeroUpper.cpp
X86IndirectBranchTracking.cpp
X86IndirectThunks.cpp
X86InterleavedAccess.cpp
@@ -84,7 +85,6 @@ set(sources
X86TargetMachine.cpp
X86TargetObjectFile.cpp
X86TargetTransformInfo.cpp
- X86VZeroUpper.cpp
X86WinEHState.cpp
X86WinEHUnwindV2.cpp
X86InsertWait.cpp
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 8f2a852faef28..426067b47d702 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -62,7 +62,13 @@ FunctionPass *createX86FPStackifierLegacyPass();
/// This pass inserts AVX vzeroupper instructions before each call to avoid
/// transition penalty between functions encoded with AVX and SSE.
-FunctionPass *createX86IssueVZeroUpperPass();
+class X86IssueVZeroUpperPass : public PassInfoMixin<X86IssueVZeroUpperPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+
+FunctionPass *createX86IssueVZeroUpperLegacyPass();
/// This pass inserts ENDBR instructions before indirect jump/call
/// destinations as part of CET IBT mechanism.
diff --git a/llvm/lib/Target/X86/X86VZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
similarity index 71%
rename from llvm/lib/Target/X86/X86VZeroUpper.cpp
rename to llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index 2f28ab36aa193..5ad4552fcc565 100644
--- a/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -1,4 +1,4 @@
-//===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
+//===- X86VIssueZeroUpper.cpp - AVX vzeroupper instruction inserter -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -24,9 +24,11 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -37,7 +39,7 @@
using namespace llvm;
-#define DEBUG_TYPE "x86-vzeroupper"
+#define DEBUG_TYPE "x86-issue-vzero-upper"
static cl::opt<bool>
UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
@@ -47,77 +49,40 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
namespace {
+class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
+public:
+ static char ID;
- class VZeroUpperInserter : public MachineFunctionPass {
- public:
- VZeroUpperInserter() : MachineFunctionPass(ID) {}
+ X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().setNoVRegs();
- }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setNoVRegs();
+ }
+};
- StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
-
- private:
- void processBasicBlock(MachineBasicBlock &MBB);
- void insertVZeroUpper(MachineBasicBlock::iterator I,
- MachineBasicBlock &MBB);
- void addDirtySuccessor(MachineBasicBlock &MBB);
-
- enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
-
- static const char* getBlockExitStateName(BlockExitState ST);
-
- // Core algorithm state:
- // BlockState - Each block is either:
- // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
- // vzeroupper instructions in this block.
- // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
- // block that will ensure that YMM/ZMM is clean on exit.
- // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
- // subsequent vzeroupper in the block clears it.
- //
- // AddedToDirtySuccessors - This flag is raised when a block is added to the
- // DirtySuccessors list to ensure that it's not
- // added multiple times.
- //
- // FirstUnguardedCall - Records the location of the first unguarded call in
- // each basic block that may need to be guarded by a
- // vzeroupper. We won't know whether it actually needs
- // to be guarded until we discover a predecessor that
- // is DIRTY_OUT.
- struct BlockState {
- BlockExitState ExitState = PASS_THROUGH;
- bool AddedToDirtySuccessors = false;
- MachineBasicBlock::iterator FirstUnguardedCall;
-
- BlockState() = default;
- };
-
- using BlockStateMap = SmallVector<BlockState, 8>;
- using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
-
- BlockStateMap BlockStates;
- DirtySuccessorsWorkList DirtySuccessors;
- bool EverMadeChange;
- bool IsX86INTR;
- const TargetInstrInfo *TII;
-
- static char ID;
- };
+enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
+struct BlockState {
+ BlockExitState ExitState = PASS_THROUGH;
+ bool AddedToDirtySuccessors = false;
+ MachineBasicBlock::iterator FirstUnguardedCall;
+
+ BlockState() = default;
+};
} // end anonymous namespace
-char VZeroUpperInserter::ID = 0;
+char X86IssueVZeroUpperLegacy::ID = 0;
-FunctionPass *llvm::createX86IssueVZeroUpperPass() {
- return new VZeroUpperInserter();
+FunctionPass *llvm::createX86IssueVZeroUpperLegacyPass() {
+ return new X86IssueVZeroUpperLegacy();
}
#ifndef NDEBUG
-const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
+static const char* getBlockExitStateName(BlockExitState ST) {
switch (ST) {
case PASS_THROUGH: return "Pass-through";
case EXITS_DIRTY: return "Exits-dirty";
@@ -179,15 +144,18 @@ static bool callHasRegMask(MachineInstr &MI) {
}
/// Insert a vzeroupper instruction before I.
-void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
- MachineBasicBlock &MBB) {
+static void insertVZeroUpper(MachineBasicBlock::iterator I,
+ MachineBasicBlock &MBB, const TargetInstrInfo *TII,
+ bool &EverMadeChange) {
BuildMI(MBB, I, I->getDebugLoc(), TII->get(X86::VZEROUPPER));
++NumVZU;
EverMadeChange = true;
}
/// Add MBB to the DirtySuccessors list if it hasn't already been added.
-void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
+static void addDirtySuccessor(MachineBasicBlock &MBB,
+ SmallVectorImpl<BlockState> &BlockStates,
+ SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors) {
if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
DirtySuccessors.push_back(&MBB);
BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
@@ -196,7 +164,11 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
/// Loop over all of the instructions in the basic block, inserting vzeroupper
/// instructions before function calls.
-void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
+static void processBasicBlock(MachineBasicBlock &MBB,
+ SmallVectorImpl<BlockState> &BlockStates,
+ SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors,
+ bool IsX86INTR, const TargetInstrInfo *TII,
+ bool &EverMadeChange) {
// Start by assuming that the block is PASS_THROUGH which implies no unguarded
// calls.
BlockExitState CurState = PASS_THROUGH;
@@ -253,7 +225,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// After the inserted VZEROUPPER the state becomes clean again, but
// other YMM/ZMM may appear before other subsequent calls or even before
// the end of the BB.
- insertVZeroUpper(MI, MBB);
+ insertVZeroUpper(MI, MBB, TII, EverMadeChange);
CurState = EXITS_CLEAN;
} else if (CurState == PASS_THROUGH) {
// If this block is currently in pass-through state and we encounter a
@@ -271,24 +243,20 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
if (CurState == EXITS_DIRTY)
for (MachineBasicBlock *Succ : MBB.successors())
- addDirtySuccessor(*Succ);
+ addDirtySuccessor(*Succ, BlockStates, DirtySuccessors);
BlockStates[MBB.getNumber()].ExitState = CurState;
}
-/// Loop over all of the basic blocks, inserting vzeroupper instructions before
-/// function calls.
-bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
+static bool issueVZeroUpper(MachineFunction &MF) {
if (!UseVZeroUpper)
return false;
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
if (!ST.hasAVX() || !ST.insertVZEROUPPER())
return false;
- TII = ST.getInstrInfo();
+
MachineRegisterInfo &MRI = MF.getRegInfo();
- EverMadeChange = false;
- IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
@@ -309,20 +277,25 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
if (!YmmOrZmmUsed)
return false;
- assert(BlockStates.empty() && DirtySuccessors.empty() &&
+ const TargetInstrInfo *TII = ST.getInstrInfo();
+ bool IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
+ bool EverMadeChange = false;
+ SmallVector<BlockState, 8> BlockStates(MF.getNumBlockIDs());
+ SmallVector<MachineBasicBlock *, 8> DirtySuccessors;
+
+ assert(BlockStates.size() == MF.getNumBlockIDs() && DirtySuccessors.empty() &&
"X86VZeroUpper state should be clear");
- BlockStates.resize(MF.getNumBlockIDs());
// Process all blocks. This will compute block exit states, record the first
// unguarded call in each block, and add successors of dirty blocks to the
// DirtySuccessors list.
for (MachineBasicBlock &MBB : MF)
- processBasicBlock(MBB);
+ processBasicBlock(MBB, BlockStates, DirtySuccessors, IsX86INTR, TII, EverMadeChange);
// If any YMM/ZMM regs are live-in to this function, add the entry block to
// the DirtySuccessors list
if (FnHasLiveInYmmOrZmm)
- addDirtySuccessor(MF.front());
+ addDirtySuccessor(MF.front(), BlockStates, DirtySuccessors);
// Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
// vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
@@ -335,7 +308,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
// MBB is a successor of a dirty block, so its first call needs to be
// guarded.
if (BBState.FirstUnguardedCall != MBB.end())
- insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
+ insertVZeroUpper(BBState.FirstUnguardedCall, MBB, TII, EverMadeChange);
// If this successor was a pass-through block, then it is now dirty. Its
// successors need to be added to the worklist (if they haven't been
@@ -344,10 +317,19 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber()
<< " was Pass-through, is now Dirty-out.\n");
for (MachineBasicBlock *Succ : MBB.successors())
- addDirtySuccessor(*Succ);
+ addDirtySuccessor(*Succ, BlockStates, DirtySuccessors);
}
}
- BlockStates.clear();
return EverMadeChange;
}
+
+bool X86IssueVZeroUpperLegacy::runOnMachineFunction(MachineFunction &MF) {
+ return issueVZeroUpper(MF);
+}
+
+PreservedAnalyses
+X86IssueVZeroUpperPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ return issueVZeroUpper(MF) ? getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>() : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index 91156260d2c7c..9bbc057f48506 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -50,6 +50,7 @@ MACHINE_FUNCTION_PASS("x86-flags-copy-lowering", X86FlagsCopyLoweringPass())
MACHINE_FUNCTION_PASS("x86-fp-stackifier", X86FPStackifierPass())
MACHINE_FUNCTION_PASS("x86-indirect-branch-tracking", X86IndirectBranchTrackingPass())
MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this))
+MACHINE_FUNCTION_PASS("x86-issue-vzero-upper", X86IssueVZeroUpperPass())
MACHINE_FUNCTION_PASS("x86-lower-tile-copy", X86LowerTileCopyPass())
MACHINE_FUNCTION_PASS("x86-lvi-load", X86LoadValueInjectionLoadHardeningPass())
MACHINE_FUNCTION_PASS("x86-lvi-ret", X86LoadValueInjectionRetHardeningPass())
@@ -70,6 +71,5 @@ DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix())
DUMMY_MACHINE_FUNCTION_PASS("x86-global-base-reg", X86GlobalBaseRegPass())
DUMMY_MACHINE_FUNCTION_PASS("x86-indirect-thunks", X86IndirectThunks())
DUMMY_MACHINE_FUNCTION_PASS("x86-insert-x87-wait", X86InsertX87WaitPass())
-DUMMY_MACHINE_FUNCTION_PASS("x86-issue-vzero-upper", X86IssueVZeroUpperPass())
DUMMY_MACHINE_FUNCTION_PASS("x86-pad-short-functions", X86PadShortFunctionsPass())
#undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 95ad484d04ccd..b27815caabcf6 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -568,7 +568,7 @@ void X86PassConfig::addPreEmitPass() {
addPass(createX86IndirectBranchTrackingLegacyPass());
- addPass(createX86IssueVZeroUpperPass());
+ addPass(createX86IssueVZeroUpperLegacyPass());
if (getOptLevel() != CodeGenOptLevel::None) {
addPass(createX86FixupBWInstsLegacyPass());
>From 79cff18ed0a9eb52d3ebc73ebb63aa2912b12ce5 Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 04:33:55 +0000
Subject: [PATCH 2/6] test
---
llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp | 3 +-
llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 77 ++++++++++++-------
llvm/test/CodeGen/X86/llc-pipeline-npm.ll | 4 +
3 files changed, 53 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
index 53171564ce1ad..b10028f8323be 100644
--- a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
@@ -173,8 +173,7 @@ void X86CodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
}
addMachineFunctionPass(X86IndirectBranchTrackingPass(), PMW);
- // TODO(boomanaiden154): Add X86IssueVZeroUpperPass here once it has been
- // ported.
+ addMachineFunctionPass(X86IssueVZeroUpperPass(), PMW);
if (getOptLevel() != CodeGenOptLevel::None) {
addMachineFunctionPass(X86FixupBWInstsPass(), PMW);
diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index 5ad4552fcc565..ca05455a5980d 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -1,4 +1,4 @@
-//===- X86VIssueZeroUpper.cpp - AVX vzeroupper instruction inserter -------===//
+//===- X86IssueVZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -24,11 +24,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/Analysis.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -49,30 +47,52 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
namespace {
-class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
-public:
- static char ID;
- X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
+ class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
+ public:
+ static char ID;
+ X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
- StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
+ StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().setNoVRegs();
- }
-};
-
-enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
-
-struct BlockState {
- BlockExitState ExitState = PASS_THROUGH;
- bool AddedToDirtySuccessors = false;
- MachineBasicBlock::iterator FirstUnguardedCall;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setNoVRegs();
+ }
+ };
+
+ enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
+
+ // Core algorithm state:
+ // BlockState - Each block is either:
+ // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
+ // vzeroupper instructions in this block.
+ // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
+ // block that will ensure that YMM/ZMM is clean on exit.
+ // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
+ // subsequent vzeroupper in the block clears it.
+ //
+ // AddedToDirtySuccessors - This flag is raised when a block is added to the
+ // DirtySuccessors list to ensure that it's not
+ // added multiple times.
+ //
+ // FirstUnguardedCall - Records the location of the first unguarded call in
+ // each basic block that may need to be guarded by a
+ // vzeroupper. We won't know whether it actually needs
+ // to be guarded until we discover a predecessor that
+ // is DIRTY_OUT.
+ struct BlockState {
+ BlockExitState ExitState = PASS_THROUGH;
+ bool AddedToDirtySuccessors = false;
+ MachineBasicBlock::iterator FirstUnguardedCall;
+
+ BlockState() = default;
+ };
+
+ using BlockStateMap = SmallVector<BlockState, 8>;
+ using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
- BlockState() = default;
-};
} // end anonymous namespace
char X86IssueVZeroUpperLegacy::ID = 0;
@@ -154,8 +174,8 @@ static void insertVZeroUpper(MachineBasicBlock::iterator I,
/// Add MBB to the DirtySuccessors list if it hasn't already been added.
static void addDirtySuccessor(MachineBasicBlock &MBB,
- SmallVectorImpl<BlockState> &BlockStates,
- SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors) {
+ BlockStateMap &BlockStates,
+ DirtySuccessorsWorkList &DirtySuccessors) {
if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
DirtySuccessors.push_back(&MBB);
BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
@@ -165,8 +185,8 @@ static void addDirtySuccessor(MachineBasicBlock &MBB,
/// Loop over all of the instructions in the basic block, inserting vzeroupper
/// instructions before function calls.
static void processBasicBlock(MachineBasicBlock &MBB,
- SmallVectorImpl<BlockState> &BlockStates,
- SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors,
+ BlockStateMap &BlockStates,
+ DirtySuccessorsWorkList &DirtySuccessors,
bool IsX86INTR, const TargetInstrInfo *TII,
bool &EverMadeChange) {
// Start by assuming that the block is PASS_THROUGH which implies no unguarded
@@ -257,7 +277,6 @@ static bool issueVZeroUpper(MachineFunction &MF) {
return false;
MachineRegisterInfo &MRI = MF.getRegInfo();
-
bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
// Fast check: if the function doesn't use any ymm/zmm registers, we don't
@@ -280,8 +299,8 @@ static bool issueVZeroUpper(MachineFunction &MF) {
const TargetInstrInfo *TII = ST.getInstrInfo();
bool IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
bool EverMadeChange = false;
- SmallVector<BlockState, 8> BlockStates(MF.getNumBlockIDs());
- SmallVector<MachineBasicBlock *, 8> DirtySuccessors;
+ BlockStateMap BlockStates(MF.getNumBlockIDs());
+ DirtySuccessorsWorkList DirtySuccessors;
assert(BlockStates.size() == MF.getNumBlockIDs() && DirtySuccessors.empty() &&
"X86VZeroUpper state should be clear");
diff --git a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
index 37c76babfc5ff..25e24c33b175a 100644
--- a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
@@ -57,6 +57,7 @@
; O0-NEXT: xray-instrumentation
; O0-NEXT: patchable-function
; O0-NEXT: x86-indirect-branch-tracking
+; O0-NEXT: x86-issue-vzero-upper
; O0-NEXT: x86-compress-evex
; O0-NEXT: FuncletLayoutPass
; O0-NEXT: remove-loads-into-fake-uses
@@ -172,6 +173,7 @@
; O2-NEXT: x86-fixup-leas
; O2-NEXT: x86-fixup-inst-tuning
; O2-NEXT: x86-fixup-inst-tuning
+; O2-NEXT: x86-issue-vzero-upper
; O2-NEXT: x86-compress-evex
; O2-NEXT: FuncletLayoutPass
; O2-NEXT: remove-loads-into-fake-uses
@@ -234,6 +236,7 @@
; O0-WINDOWS-NEXT: xray-instrumentation
; O0-WINDOWS-NEXT: patchable-function
; O0-WINDOWS-NEXT: x86-indirect-branch-tracking
+; O0-WINDOWS-NEXT: x86-issue-vzero-upper
; O0-WINDOWS-NEXT: x86-compress-evex
; O0-WINDOWS-NEXT: FuncletLayoutPass
; O0-WINDOWS-NEXT: remove-loads-into-fake-uses
@@ -352,6 +355,7 @@
; O3-WINDOWS-NEXT: x86-fixup-leas
; O3-WINDOWS-NEXT: x86-fixup-inst-tuning
; O3-WINDOWS-NEXT: x86-fixup-inst-tuning
+; O3-WINDOWS-NEXT: x86-issue-vzero-upper
; O3-WINDOWS-NEXT: x86-compress-evex
; O3-WINDOWS-NEXT: FuncletLayoutPass
; O3-WINDOWS-NEXT: remove-loads-into-fake-uses
>From e0586b39128d07c99a8db631b160aea98bdb4fc2 Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 04:54:06 +0000
Subject: [PATCH 3/6] update test files along with more diffs
---
llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 5 +++++
llvm/test/CodeGen/X86/llc-pipeline-npm.ll | 4 ++--
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index ca05455a5980d..58890bc968a45 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -24,9 +24,11 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -268,6 +270,8 @@ static void processBasicBlock(MachineBasicBlock &MBB,
BlockStates[MBB.getNumber()].ExitState = CurState;
}
+/// Loop over all of the basic blocks, inserting vzeroupper instructions before
+/// function calls.
static bool issueVZeroUpper(MachineFunction &MF) {
if (!UseVZeroUpper)
return false;
@@ -277,6 +281,7 @@ static bool issueVZeroUpper(MachineFunction &MF) {
return false;
MachineRegisterInfo &MRI = MF.getRegInfo();
+
bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
// Fast check: if the function doesn't use any ymm/zmm registers, we don't
diff --git a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
index 25e24c33b175a..513d65ab1a4b6 100644
--- a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
@@ -169,11 +169,11 @@
; O2-NEXT: patchable-function
; O2-NEXT: BreakFalseDepsPass
; O2-NEXT: x86-indirect-branch-tracking
+; O2-NEXT: x86-issue-vzero-upper
; O2-NEXT: x86-fixup-bw-insts
; O2-NEXT: x86-fixup-leas
; O2-NEXT: x86-fixup-inst-tuning
; O2-NEXT: x86-fixup-inst-tuning
-; O2-NEXT: x86-issue-vzero-upper
; O2-NEXT: x86-compress-evex
; O2-NEXT: FuncletLayoutPass
; O2-NEXT: remove-loads-into-fake-uses
@@ -351,11 +351,11 @@
; O3-WINDOWS-NEXT: patchable-function
; O3-WINDOWS-NEXT: BreakFalseDepsPass
; O3-WINDOWS-NEXT: x86-indirect-branch-tracking
+; O3-WINDOWS-NEXT: x86-issue-vzero-upper
; O3-WINDOWS-NEXT: x86-fixup-bw-insts
; O3-WINDOWS-NEXT: x86-fixup-leas
; O3-WINDOWS-NEXT: x86-fixup-inst-tuning
; O3-WINDOWS-NEXT: x86-fixup-inst-tuning
-; O3-WINDOWS-NEXT: x86-issue-vzero-upper
; O3-WINDOWS-NEXT: x86-compress-evex
; O3-WINDOWS-NEXT: FuncletLayoutPass
; O3-WINDOWS-NEXT: remove-loads-into-fake-uses
>From f250815715e82e1d7f7484aa8861652ceacdab5e Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 04:59:49 +0000
Subject: [PATCH 4/6] undo namespace usage
---
llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index 58890bc968a45..ff60522b847b4 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -91,10 +91,6 @@ namespace {
BlockState() = default;
};
-
- using BlockStateMap = SmallVector<BlockState, 8>;
- using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
-
} // end anonymous namespace
char X86IssueVZeroUpperLegacy::ID = 0;
@@ -176,8 +172,8 @@ static void insertVZeroUpper(MachineBasicBlock::iterator I,
/// Add MBB to the DirtySuccessors list if it hasn't already been added.
static void addDirtySuccessor(MachineBasicBlock &MBB,
- BlockStateMap &BlockStates,
- DirtySuccessorsWorkList &DirtySuccessors) {
+ SmallVectorImpl<BlockState> &BlockStates,
+ SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors) {
if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
DirtySuccessors.push_back(&MBB);
BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
@@ -187,8 +183,8 @@ static void addDirtySuccessor(MachineBasicBlock &MBB,
/// Loop over all of the instructions in the basic block, inserting vzeroupper
/// instructions before function calls.
static void processBasicBlock(MachineBasicBlock &MBB,
- BlockStateMap &BlockStates,
- DirtySuccessorsWorkList &DirtySuccessors,
+ SmallVectorImpl<BlockState> &BlockStates,
+ SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors,
bool IsX86INTR, const TargetInstrInfo *TII,
bool &EverMadeChange) {
// Start by assuming that the block is PASS_THROUGH which implies no unguarded
@@ -304,8 +300,8 @@ static bool issueVZeroUpper(MachineFunction &MF) {
const TargetInstrInfo *TII = ST.getInstrInfo();
bool IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
bool EverMadeChange = false;
- BlockStateMap BlockStates(MF.getNumBlockIDs());
- DirtySuccessorsWorkList DirtySuccessors;
+ SmallVector<BlockState, 8> BlockStates(MF.getNumBlockIDs());
+ SmallVector<MachineBasicBlock *, 8> DirtySuccessors;
assert(BlockStates.size() == MF.getNumBlockIDs() && DirtySuccessors.empty() &&
"X86VZeroUpper state should be clear");
>From f48711125078dc880ffd49f8da04da716b9df721 Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 05:28:06 +0000
Subject: [PATCH 5/6] undo undo namespace usage
---
llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index ff60522b847b4..5780ac373619a 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -91,6 +91,9 @@ namespace {
BlockState() = default;
};
+
+ using BlockStateMap = SmallVector<BlockState, 8>;
+ using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
} // end anonymous namespace
char X86IssueVZeroUpperLegacy::ID = 0;
@@ -172,8 +175,8 @@ static void insertVZeroUpper(MachineBasicBlock::iterator I,
/// Add MBB to the DirtySuccessors list if it hasn't already been added.
static void addDirtySuccessor(MachineBasicBlock &MBB,
- SmallVectorImpl<BlockState> &BlockStates,
- SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors) {
+ BlockStateMap &BlockStates,
+ DirtySuccessorsWorkList &DirtySuccessors) {
if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
DirtySuccessors.push_back(&MBB);
BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
@@ -183,8 +186,8 @@ static void addDirtySuccessor(MachineBasicBlock &MBB,
/// Loop over all of the instructions in the basic block, inserting vzeroupper
/// instructions before function calls.
static void processBasicBlock(MachineBasicBlock &MBB,
- SmallVectorImpl<BlockState> &BlockStates,
- SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors,
+ BlockStateMap &BlockStates,
+ DirtySuccessorsWorkList &DirtySuccessors,
bool IsX86INTR, const TargetInstrInfo *TII,
bool &EverMadeChange) {
// Start by assuming that the block is PASS_THROUGH which implies no unguarded
@@ -300,8 +303,8 @@ static bool issueVZeroUpper(MachineFunction &MF) {
const TargetInstrInfo *TII = ST.getInstrInfo();
bool IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
bool EverMadeChange = false;
- SmallVector<BlockState, 8> BlockStates(MF.getNumBlockIDs());
- SmallVector<MachineBasicBlock *, 8> DirtySuccessors;
+ BlockStateMap BlockStates(MF.getNumBlockIDs());
+ DirtySuccessorsWorkList DirtySuccessors;
assert(BlockStates.size() == MF.getNumBlockIDs() && DirtySuccessors.empty() &&
"X86VZeroUpper state should be clear");
>From 45958e776633c80a47074f574771bbdefe438e03 Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 05:51:25 +0000
Subject: [PATCH 6/6] clang fmt
---
llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 109 +++++++++++----------
1 file changed, 58 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index 5780ac373619a..26b91516c0e41 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -1,4 +1,5 @@
-//===- X86IssueVZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
+//===- X86IssueVZeroUpper.cpp - AVX vzeroupper instruction inserter
+//------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -42,58 +43,58 @@ using namespace llvm;
#define DEBUG_TYPE "x86-issue-vzero-upper"
static cl::opt<bool>
-UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
- cl::desc("Minimize AVX to SSE transition penalty"),
- cl::init(true));
+ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
+ cl::desc("Minimize AVX to SSE transition penalty"),
+ cl::init(true));
STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
namespace {
- class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
- public:
- static char ID;
- X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
+class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+ X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
- StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
+ StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().setNoVRegs();
- }
- };
-
- enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
-
- // Core algorithm state:
- // BlockState - Each block is either:
- // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
- // vzeroupper instructions in this block.
- // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
- // block that will ensure that YMM/ZMM is clean on exit.
- // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
- // subsequent vzeroupper in the block clears it.
- //
- // AddedToDirtySuccessors - This flag is raised when a block is added to the
- // DirtySuccessors list to ensure that it's not
- // added multiple times.
- //
- // FirstUnguardedCall - Records the location of the first unguarded call in
- // each basic block that may need to be guarded by a
- // vzeroupper. We won't know whether it actually needs
- // to be guarded until we discover a predecessor that
- // is DIRTY_OUT.
- struct BlockState {
- BlockExitState ExitState = PASS_THROUGH;
- bool AddedToDirtySuccessors = false;
- MachineBasicBlock::iterator FirstUnguardedCall;
-
- BlockState() = default;
- };
-
- using BlockStateMap = SmallVector<BlockState, 8>;
- using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setNoVRegs();
+ }
+};
+
+enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
+
+// Core algorithm state:
+// BlockState - Each block is either:
+// - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
+// vzeroupper instructions in this block.
+// - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
+// block that will ensure that YMM/ZMM is clean on exit.
+// - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
+// subsequent vzeroupper in the block clears it.
+//
+// AddedToDirtySuccessors - This flag is raised when a block is added to the
+// DirtySuccessors list to ensure that it's not
+// added multiple times.
+//
+// FirstUnguardedCall - Records the location of the first unguarded call in
+// each basic block that may need to be guarded by a
+// vzeroupper. We won't know whether it actually needs
+// to be guarded until we discover a predecessor that
+// is DIRTY_OUT.
+struct BlockState {
+ BlockExitState ExitState = PASS_THROUGH;
+ bool AddedToDirtySuccessors = false;
+ MachineBasicBlock::iterator FirstUnguardedCall;
+
+ BlockState() = default;
+};
+
+using BlockStateMap = SmallVector<BlockState, 8>;
+using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
} // end anonymous namespace
char X86IssueVZeroUpperLegacy::ID = 0;
@@ -103,11 +104,14 @@ FunctionPass *llvm::createX86IssueVZeroUpperLegacyPass() {
}
#ifndef NDEBUG
-static const char* getBlockExitStateName(BlockExitState ST) {
+static const char *getBlockExitStateName(BlockExitState ST) {
switch (ST) {
- case PASS_THROUGH: return "Pass-through";
- case EXITS_DIRTY: return "Exits-dirty";
- case EXITS_CLEAN: return "Exits-clean";
+ case PASS_THROUGH:
+ return "Pass-through";
+ case EXITS_DIRTY:
+ return "Exits-dirty";
+ case EXITS_CLEAN:
+ return "Exits-clean";
}
llvm_unreachable("Invalid block exit state.");
}
@@ -313,7 +317,8 @@ static bool issueVZeroUpper(MachineFunction &MF) {
// unguarded call in each block, and add successors of dirty blocks to the
// DirtySuccessors list.
for (MachineBasicBlock &MBB : MF)
- processBasicBlock(MBB, BlockStates, DirtySuccessors, IsX86INTR, TII, EverMadeChange);
+ processBasicBlock(MBB, BlockStates, DirtySuccessors, IsX86INTR, TII,
+ EverMadeChange);
// If any YMM/ZMM regs are live-in to this function, add the entry block to
// the DirtySuccessors list
@@ -354,5 +359,7 @@ bool X86IssueVZeroUpperLegacy::runOnMachineFunction(MachineFunction &MF) {
PreservedAnalyses
X86IssueVZeroUpperPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
- return issueVZeroUpper(MF) ? getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>() : PreservedAnalyses::all();
+ return issueVZeroUpper(MF) ? getMachineFunctionPassPreservedAnalyses()
+ .preserveSet<CFGAnalyses>()
+ : PreservedAnalyses::all();
}
More information about the llvm-commits
mailing list