[llvm] [NewPM] Port x86-issue-vzero-upper (PR #180886)

Kyungtak Woo via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 10 21:51:45 PST 2026


https://github.com/kevinwkt updated https://github.com/llvm/llvm-project/pull/180886

>From 73f940863c8d5131d20b98cf5430bd7807d1714e Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Mon, 9 Feb 2026 19:49:36 +0000
Subject: [PATCH 1/6] Add port for x86 for issuevzeroupper

---
 llvm/lib/Target/X86/CMakeLists.txt            |   2 +-
 llvm/lib/Target/X86/X86.h                     |   8 +-
 ...6VZeroUpper.cpp => X86IssueVZeroUpper.cpp} | 144 ++++++++----------
 llvm/lib/Target/X86/X86PassRegistry.def       |   2 +-
 llvm/lib/Target/X86/X86TargetMachine.cpp      |   2 +-
 5 files changed, 73 insertions(+), 85 deletions(-)
 rename llvm/lib/Target/X86/{X86VZeroUpper.cpp => X86IssueVZeroUpper.cpp} (71%)

diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index 5cffe98e8dc81..4ddd3d646ae2f 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -57,6 +57,7 @@ set(sources
   X86ISelDAGToDAG.cpp
   X86ISelLowering.cpp
   X86ISelLoweringCall.cpp
+  X86IssueVZeroUpper.cpp
   X86IndirectBranchTracking.cpp
   X86IndirectThunks.cpp
   X86InterleavedAccess.cpp
@@ -84,7 +85,6 @@ set(sources
   X86TargetMachine.cpp
   X86TargetObjectFile.cpp
   X86TargetTransformInfo.cpp
-  X86VZeroUpper.cpp
   X86WinEHState.cpp
   X86WinEHUnwindV2.cpp
   X86InsertWait.cpp
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 8f2a852faef28..426067b47d702 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -62,7 +62,13 @@ FunctionPass *createX86FPStackifierLegacyPass();
 
 /// This pass inserts AVX vzeroupper instructions before each call to avoid
 /// transition penalty between functions encoded with AVX and SSE.
-FunctionPass *createX86IssueVZeroUpperPass();
+class X86IssueVZeroUpperPass : public PassInfoMixin<X86IssueVZeroUpperPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+FunctionPass *createX86IssueVZeroUpperLegacyPass();
 
 /// This pass inserts ENDBR instructions before indirect jump/call
 /// destinations as part of CET IBT mechanism.
diff --git a/llvm/lib/Target/X86/X86VZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
similarity index 71%
rename from llvm/lib/Target/X86/X86VZeroUpper.cpp
rename to llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index 2f28ab36aa193..5ad4552fcc565 100644
--- a/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -1,4 +1,4 @@
-//===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
+//===- X86VIssueZeroUpper.cpp - AVX vzeroupper instruction inserter -------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -24,9 +24,11 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Analysis.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
@@ -37,7 +39,7 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "x86-vzeroupper"
+#define DEBUG_TYPE "x86-issue-vzero-upper"
 
 static cl::opt<bool>
 UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
@@ -47,77 +49,40 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
 STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
 
 namespace {
+class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
+public:
+  static char ID;
 
-  class VZeroUpperInserter : public MachineFunctionPass {
-  public:
-    VZeroUpperInserter() : MachineFunctionPass(ID) {}
+  X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
 
-    bool runOnMachineFunction(MachineFunction &MF) override;
+  StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
 
-    MachineFunctionProperties getRequiredProperties() const override {
-      return MachineFunctionProperties().setNoVRegs();
-    }
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().setNoVRegs();
+  }
+};
 
-    StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
-
-  private:
-    void processBasicBlock(MachineBasicBlock &MBB);
-    void insertVZeroUpper(MachineBasicBlock::iterator I,
-                          MachineBasicBlock &MBB);
-    void addDirtySuccessor(MachineBasicBlock &MBB);
-
-    enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
-
-    static const char* getBlockExitStateName(BlockExitState ST);
-
-    // Core algorithm state:
-    // BlockState - Each block is either:
-    //   - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
-    //                   vzeroupper instructions in this block.
-    //   - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
-    //                  block that will ensure that YMM/ZMM is clean on exit.
-    //   - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
-    //                  subsequent vzeroupper in the block clears it.
-    //
-    // AddedToDirtySuccessors - This flag is raised when a block is added to the
-    //                          DirtySuccessors list to ensure that it's not
-    //                          added multiple times.
-    //
-    // FirstUnguardedCall - Records the location of the first unguarded call in
-    //                      each basic block that may need to be guarded by a
-    //                      vzeroupper. We won't know whether it actually needs
-    //                      to be guarded until we discover a predecessor that
-    //                      is DIRTY_OUT.
-    struct BlockState {
-      BlockExitState ExitState = PASS_THROUGH;
-      bool AddedToDirtySuccessors = false;
-      MachineBasicBlock::iterator FirstUnguardedCall;
-
-      BlockState() = default;
-    };
-
-    using BlockStateMap = SmallVector<BlockState, 8>;
-    using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
-
-    BlockStateMap BlockStates;
-    DirtySuccessorsWorkList DirtySuccessors;
-    bool EverMadeChange;
-    bool IsX86INTR;
-    const TargetInstrInfo *TII;
-
-    static char ID;
-  };
+enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
 
+struct BlockState {
+  BlockExitState ExitState = PASS_THROUGH;
+  bool AddedToDirtySuccessors = false;
+  MachineBasicBlock::iterator FirstUnguardedCall;
+
+  BlockState() = default;
+};
 } // end anonymous namespace
 
-char VZeroUpperInserter::ID = 0;
+char X86IssueVZeroUpperLegacy::ID = 0;
 
-FunctionPass *llvm::createX86IssueVZeroUpperPass() {
-  return new VZeroUpperInserter();
+FunctionPass *llvm::createX86IssueVZeroUpperLegacyPass() {
+  return new X86IssueVZeroUpperLegacy();
 }
 
 #ifndef NDEBUG
-const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
+static const char* getBlockExitStateName(BlockExitState ST) {
   switch (ST) {
     case PASS_THROUGH: return "Pass-through";
     case EXITS_DIRTY: return "Exits-dirty";
@@ -179,15 +144,18 @@ static bool callHasRegMask(MachineInstr &MI) {
 }
 
 /// Insert a vzeroupper instruction before I.
-void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
-                                          MachineBasicBlock &MBB) {
+static void insertVZeroUpper(MachineBasicBlock::iterator I,
+                             MachineBasicBlock &MBB, const TargetInstrInfo *TII,
+                             bool &EverMadeChange) {
   BuildMI(MBB, I, I->getDebugLoc(), TII->get(X86::VZEROUPPER));
   ++NumVZU;
   EverMadeChange = true;
 }
 
 /// Add MBB to the DirtySuccessors list if it hasn't already been added.
-void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
+static void addDirtySuccessor(MachineBasicBlock &MBB,
+                              SmallVectorImpl<BlockState> &BlockStates,
+                              SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors) {
   if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
     DirtySuccessors.push_back(&MBB);
     BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
@@ -196,7 +164,11 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
 
 /// Loop over all of the instructions in the basic block, inserting vzeroupper
 /// instructions before function calls.
-void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
+static void processBasicBlock(MachineBasicBlock &MBB,
+                              SmallVectorImpl<BlockState> &BlockStates,
+                              SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors,
+                              bool IsX86INTR, const TargetInstrInfo *TII,
+                              bool &EverMadeChange) {
   // Start by assuming that the block is PASS_THROUGH which implies no unguarded
   // calls.
   BlockExitState CurState = PASS_THROUGH;
@@ -253,7 +225,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
       // After the inserted VZEROUPPER the state becomes clean again, but
       // other YMM/ZMM may appear before other subsequent calls or even before
       // the end of the BB.
-      insertVZeroUpper(MI, MBB);
+      insertVZeroUpper(MI, MBB, TII, EverMadeChange);
       CurState = EXITS_CLEAN;
     } else if (CurState == PASS_THROUGH) {
       // If this block is currently in pass-through state and we encounter a
@@ -271,24 +243,20 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
 
   if (CurState == EXITS_DIRTY)
     for (MachineBasicBlock *Succ : MBB.successors())
-      addDirtySuccessor(*Succ);
+      addDirtySuccessor(*Succ, BlockStates, DirtySuccessors);
 
   BlockStates[MBB.getNumber()].ExitState = CurState;
 }
 
-/// Loop over all of the basic blocks, inserting vzeroupper instructions before
-/// function calls.
-bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
+static bool issueVZeroUpper(MachineFunction &MF) {
   if (!UseVZeroUpper)
     return false;
 
   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
   if (!ST.hasAVX() || !ST.insertVZEROUPPER())
     return false;
-  TII = ST.getInstrInfo();
+
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  EverMadeChange = false;
-  IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
 
   bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
 
@@ -309,20 +277,25 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
   if (!YmmOrZmmUsed)
     return false;
 
-  assert(BlockStates.empty() && DirtySuccessors.empty() &&
+  const TargetInstrInfo *TII = ST.getInstrInfo();
+  bool IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
+  bool EverMadeChange = false;
+  SmallVector<BlockState, 8> BlockStates(MF.getNumBlockIDs());
+  SmallVector<MachineBasicBlock *, 8> DirtySuccessors;
+
+  assert(BlockStates.size() == MF.getNumBlockIDs() && DirtySuccessors.empty() &&
          "X86VZeroUpper state should be clear");
-  BlockStates.resize(MF.getNumBlockIDs());
 
   // Process all blocks. This will compute block exit states, record the first
   // unguarded call in each block, and add successors of dirty blocks to the
   // DirtySuccessors list.
   for (MachineBasicBlock &MBB : MF)
-    processBasicBlock(MBB);
+    processBasicBlock(MBB, BlockStates, DirtySuccessors, IsX86INTR, TII, EverMadeChange);
 
   // If any YMM/ZMM regs are live-in to this function, add the entry block to
   // the DirtySuccessors list
   if (FnHasLiveInYmmOrZmm)
-    addDirtySuccessor(MF.front());
+    addDirtySuccessor(MF.front(), BlockStates, DirtySuccessors);
 
   // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
   // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
@@ -335,7 +308,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
     // MBB is a successor of a dirty block, so its first call needs to be
     // guarded.
     if (BBState.FirstUnguardedCall != MBB.end())
-      insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
+      insertVZeroUpper(BBState.FirstUnguardedCall, MBB, TII, EverMadeChange);
 
     // If this successor was a pass-through block, then it is now dirty. Its
     // successors need to be added to the worklist (if they haven't been
@@ -344,10 +317,19 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
       LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber()
                         << " was Pass-through, is now Dirty-out.\n");
       for (MachineBasicBlock *Succ : MBB.successors())
-        addDirtySuccessor(*Succ);
+        addDirtySuccessor(*Succ, BlockStates, DirtySuccessors);
     }
   }
 
-  BlockStates.clear();
   return EverMadeChange;
 }
+
+bool X86IssueVZeroUpperLegacy::runOnMachineFunction(MachineFunction &MF) {
+  return issueVZeroUpper(MF);
+}
+
+PreservedAnalyses
+X86IssueVZeroUpperPass::run(MachineFunction &MF,
+                            MachineFunctionAnalysisManager &MFAM) {
+  return issueVZeroUpper(MF) ? getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>() : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index 91156260d2c7c..9bbc057f48506 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -50,6 +50,7 @@ MACHINE_FUNCTION_PASS("x86-flags-copy-lowering", X86FlagsCopyLoweringPass())
 MACHINE_FUNCTION_PASS("x86-fp-stackifier", X86FPStackifierPass())
 MACHINE_FUNCTION_PASS("x86-indirect-branch-tracking", X86IndirectBranchTrackingPass())
 MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this))
+MACHINE_FUNCTION_PASS("x86-issue-vzero-upper", X86IssueVZeroUpperPass())
 MACHINE_FUNCTION_PASS("x86-lower-tile-copy", X86LowerTileCopyPass())
 MACHINE_FUNCTION_PASS("x86-lvi-load", X86LoadValueInjectionLoadHardeningPass())
 MACHINE_FUNCTION_PASS("x86-lvi-ret", X86LoadValueInjectionRetHardeningPass())
@@ -70,6 +71,5 @@ DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix())
 DUMMY_MACHINE_FUNCTION_PASS("x86-global-base-reg", X86GlobalBaseRegPass())
 DUMMY_MACHINE_FUNCTION_PASS("x86-indirect-thunks", X86IndirectThunks())
 DUMMY_MACHINE_FUNCTION_PASS("x86-insert-x87-wait", X86InsertX87WaitPass())
-DUMMY_MACHINE_FUNCTION_PASS("x86-issue-vzero-upper", X86IssueVZeroUpperPass())
 DUMMY_MACHINE_FUNCTION_PASS("x86-pad-short-functions", X86PadShortFunctionsPass())
 #undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 95ad484d04ccd..b27815caabcf6 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -568,7 +568,7 @@ void X86PassConfig::addPreEmitPass() {
 
   addPass(createX86IndirectBranchTrackingLegacyPass());
 
-  addPass(createX86IssueVZeroUpperPass());
+  addPass(createX86IssueVZeroUpperLegacyPass());
 
   if (getOptLevel() != CodeGenOptLevel::None) {
     addPass(createX86FixupBWInstsLegacyPass());

>From 79cff18ed0a9eb52d3ebc73ebb63aa2912b12ce5 Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 04:33:55 +0000
Subject: [PATCH 2/6] test

---
 llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp |  3 +-
 llvm/lib/Target/X86/X86IssueVZeroUpper.cpp    | 77 ++++++++++++-------
 llvm/test/CodeGen/X86/llc-pipeline-npm.ll     |  4 +
 3 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
index 53171564ce1ad..b10028f8323be 100644
--- a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
@@ -173,8 +173,7 @@ void X86CodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
   }
 
   addMachineFunctionPass(X86IndirectBranchTrackingPass(), PMW);
-  // TODO(boomanaiden154): Add X86IssueVZeroUpperPass here once it has been
-  // ported.
+  addMachineFunctionPass(X86IssueVZeroUpperPass(), PMW);
 
   if (getOptLevel() != CodeGenOptLevel::None) {
     addMachineFunctionPass(X86FixupBWInstsPass(), PMW);
diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index 5ad4552fcc565..ca05455a5980d 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -1,4 +1,4 @@
-//===- X86VIssueZeroUpper.cpp - AVX vzeroupper instruction inserter -------===//
+//===- X86IssueVZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -24,11 +24,9 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/Analysis.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
@@ -49,30 +47,52 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
 STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
 
 namespace {
-class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
-public:
-  static char ID;
 
-  X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
+  class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
+  public:
+    static char ID;
+    X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
 
-  StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
+    StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-  MachineFunctionProperties getRequiredProperties() const override {
-    return MachineFunctionProperties().setNoVRegs();
-  }
-};
-
-enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
-
-struct BlockState {
-  BlockExitState ExitState = PASS_THROUGH;
-  bool AddedToDirtySuccessors = false;
-  MachineBasicBlock::iterator FirstUnguardedCall;
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().setNoVRegs();
+    }
+  };
+
+  enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
+
+  // Core algorithm state:
+  // BlockState - Each block is either:
+  //   - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
+  //                   vzeroupper instructions in this block.
+  //   - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
+  //                  block that will ensure that YMM/ZMM is clean on exit.
+  //   - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
+  //                  subsequent vzeroupper in the block clears it.
+  //
+  // AddedToDirtySuccessors - This flag is raised when a block is added to the
+  //                          DirtySuccessors list to ensure that it's not
+  //                          added multiple times.
+  //
+  // FirstUnguardedCall - Records the location of the first unguarded call in
+  //                      each basic block that may need to be guarded by a
+  //                      vzeroupper. We won't know whether it actually needs
+  //                      to be guarded until we discover a predecessor that
+  //                      is DIRTY_OUT.
+  struct BlockState {
+    BlockExitState ExitState = PASS_THROUGH;
+    bool AddedToDirtySuccessors = false;
+    MachineBasicBlock::iterator FirstUnguardedCall;
+
+    BlockState() = default;
+  };
+
+  using BlockStateMap = SmallVector<BlockState, 8>;
+  using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
 
-  BlockState() = default;
-};
 } // end anonymous namespace
 
 char X86IssueVZeroUpperLegacy::ID = 0;
@@ -154,8 +174,8 @@ static void insertVZeroUpper(MachineBasicBlock::iterator I,
 
 /// Add MBB to the DirtySuccessors list if it hasn't already been added.
 static void addDirtySuccessor(MachineBasicBlock &MBB,
-                              SmallVectorImpl<BlockState> &BlockStates,
-                              SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors) {
+                              BlockStateMap &BlockStates,
+                              DirtySuccessorsWorkList &DirtySuccessors) {
   if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
     DirtySuccessors.push_back(&MBB);
     BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
@@ -165,8 +185,8 @@ static void addDirtySuccessor(MachineBasicBlock &MBB,
 /// Loop over all of the instructions in the basic block, inserting vzeroupper
 /// instructions before function calls.
 static void processBasicBlock(MachineBasicBlock &MBB,
-                              SmallVectorImpl<BlockState> &BlockStates,
-                              SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors,
+                              BlockStateMap &BlockStates,
+                              DirtySuccessorsWorkList &DirtySuccessors,
                               bool IsX86INTR, const TargetInstrInfo *TII,
                               bool &EverMadeChange) {
   // Start by assuming that the block is PASS_THROUGH which implies no unguarded
@@ -257,7 +277,6 @@ static bool issueVZeroUpper(MachineFunction &MF) {
     return false;
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
-
   bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
 
   // Fast check: if the function doesn't use any ymm/zmm registers, we don't
@@ -280,8 +299,8 @@ static bool issueVZeroUpper(MachineFunction &MF) {
   const TargetInstrInfo *TII = ST.getInstrInfo();
   bool IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
   bool EverMadeChange = false;
-  SmallVector<BlockState, 8> BlockStates(MF.getNumBlockIDs());
-  SmallVector<MachineBasicBlock *, 8> DirtySuccessors;
+  BlockStateMap BlockStates(MF.getNumBlockIDs());
+  DirtySuccessorsWorkList DirtySuccessors;
 
   assert(BlockStates.size() == MF.getNumBlockIDs() && DirtySuccessors.empty() &&
          "X86VZeroUpper state should be clear");
diff --git a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
index 37c76babfc5ff..25e24c33b175a 100644
--- a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
@@ -57,6 +57,7 @@
 ; O0-NEXT: xray-instrumentation
 ; O0-NEXT: patchable-function
 ; O0-NEXT: x86-indirect-branch-tracking
+; O0-NEXT: x86-issue-vzero-upper
 ; O0-NEXT: x86-compress-evex
 ; O0-NEXT: FuncletLayoutPass
 ; O0-NEXT: remove-loads-into-fake-uses
@@ -172,6 +173,7 @@
 ; O2-NEXT: x86-fixup-leas
 ; O2-NEXT: x86-fixup-inst-tuning
 ; O2-NEXT: x86-fixup-inst-tuning
+; O2-NEXT: x86-issue-vzero-upper
 ; O2-NEXT: x86-compress-evex
 ; O2-NEXT: FuncletLayoutPass
 ; O2-NEXT: remove-loads-into-fake-uses
@@ -234,6 +236,7 @@
 ; O0-WINDOWS-NEXT: xray-instrumentation
 ; O0-WINDOWS-NEXT: patchable-function
 ; O0-WINDOWS-NEXT: x86-indirect-branch-tracking
+; O0-WINDOWS-NEXT: x86-issue-vzero-upper
 ; O0-WINDOWS-NEXT: x86-compress-evex
 ; O0-WINDOWS-NEXT: FuncletLayoutPass
 ; O0-WINDOWS-NEXT: remove-loads-into-fake-uses
@@ -352,6 +355,7 @@
 ; O3-WINDOWS-NEXT: x86-fixup-leas
 ; O3-WINDOWS-NEXT: x86-fixup-inst-tuning
 ; O3-WINDOWS-NEXT: x86-fixup-inst-tuning
+; O3-WINDOWS-NEXT: x86-issue-vzero-upper
 ; O3-WINDOWS-NEXT: x86-compress-evex
 ; O3-WINDOWS-NEXT: FuncletLayoutPass
 ; O3-WINDOWS-NEXT: remove-loads-into-fake-uses

>From e0586b39128d07c99a8db631b160aea98bdb4fc2 Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 04:54:06 +0000
Subject: [PATCH 3/6] update test files along with more diffs

---
 llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 5 +++++
 llvm/test/CodeGen/X86/llc-pipeline-npm.ll  | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index ca05455a5980d..58890bc968a45 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -24,9 +24,11 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Analysis.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
@@ -268,6 +270,8 @@ static void processBasicBlock(MachineBasicBlock &MBB,
   BlockStates[MBB.getNumber()].ExitState = CurState;
 }
 
+/// Loop over all of the basic blocks, inserting vzeroupper instructions before
+/// function calls.
 static bool issueVZeroUpper(MachineFunction &MF) {
   if (!UseVZeroUpper)
     return false;
@@ -277,6 +281,7 @@ static bool issueVZeroUpper(MachineFunction &MF) {
     return false;
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
+
   bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
 
   // Fast check: if the function doesn't use any ymm/zmm registers, we don't
diff --git a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
index 25e24c33b175a..513d65ab1a4b6 100644
--- a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll
@@ -169,11 +169,11 @@
 ; O2-NEXT: patchable-function
 ; O2-NEXT: BreakFalseDepsPass
 ; O2-NEXT: x86-indirect-branch-tracking
+; O2-NEXT: x86-issue-vzero-upper
 ; O2-NEXT: x86-fixup-bw-insts
 ; O2-NEXT: x86-fixup-leas
 ; O2-NEXT: x86-fixup-inst-tuning
 ; O2-NEXT: x86-fixup-inst-tuning
-; O2-NEXT: x86-issue-vzero-upper
 ; O2-NEXT: x86-compress-evex
 ; O2-NEXT: FuncletLayoutPass
 ; O2-NEXT: remove-loads-into-fake-uses
@@ -351,11 +351,11 @@
 ; O3-WINDOWS-NEXT: patchable-function
 ; O3-WINDOWS-NEXT: BreakFalseDepsPass
 ; O3-WINDOWS-NEXT: x86-indirect-branch-tracking
+; O3-WINDOWS-NEXT: x86-issue-vzero-upper
 ; O3-WINDOWS-NEXT: x86-fixup-bw-insts
 ; O3-WINDOWS-NEXT: x86-fixup-leas
 ; O3-WINDOWS-NEXT: x86-fixup-inst-tuning
 ; O3-WINDOWS-NEXT: x86-fixup-inst-tuning
-; O3-WINDOWS-NEXT: x86-issue-vzero-upper
 ; O3-WINDOWS-NEXT: x86-compress-evex
 ; O3-WINDOWS-NEXT: FuncletLayoutPass
 ; O3-WINDOWS-NEXT: remove-loads-into-fake-uses

>From f250815715e82e1d7f7484aa8861652ceacdab5e Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 04:59:49 +0000
Subject: [PATCH 4/6] undo namespace usage

---
 llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index 58890bc968a45..ff60522b847b4 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -91,10 +91,6 @@ namespace {
 
     BlockState() = default;
   };
-
-  using BlockStateMap = SmallVector<BlockState, 8>;
-  using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
-
 } // end anonymous namespace
 
 char X86IssueVZeroUpperLegacy::ID = 0;
@@ -176,8 +172,8 @@ static void insertVZeroUpper(MachineBasicBlock::iterator I,
 
 /// Add MBB to the DirtySuccessors list if it hasn't already been added.
 static void addDirtySuccessor(MachineBasicBlock &MBB,
-                              BlockStateMap &BlockStates,
-                              DirtySuccessorsWorkList &DirtySuccessors) {
+                              SmallVectorImpl<BlockState> &BlockStates,
+                              SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors) {
   if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
     DirtySuccessors.push_back(&MBB);
     BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
@@ -187,8 +183,8 @@ static void addDirtySuccessor(MachineBasicBlock &MBB,
 /// Loop over all of the instructions in the basic block, inserting vzeroupper
 /// instructions before function calls.
 static void processBasicBlock(MachineBasicBlock &MBB,
-                              BlockStateMap &BlockStates,
-                              DirtySuccessorsWorkList &DirtySuccessors,
+                              SmallVectorImpl<BlockState> &BlockStates,
+                              SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors,
                               bool IsX86INTR, const TargetInstrInfo *TII,
                               bool &EverMadeChange) {
   // Start by assuming that the block is PASS_THROUGH which implies no unguarded
@@ -304,8 +300,8 @@ static bool issueVZeroUpper(MachineFunction &MF) {
   const TargetInstrInfo *TII = ST.getInstrInfo();
   bool IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
   bool EverMadeChange = false;
-  BlockStateMap BlockStates(MF.getNumBlockIDs());
-  DirtySuccessorsWorkList DirtySuccessors;
+  SmallVector<BlockState, 8> BlockStates(MF.getNumBlockIDs());
+  SmallVector<MachineBasicBlock *, 8> DirtySuccessors;
 
   assert(BlockStates.size() == MF.getNumBlockIDs() && DirtySuccessors.empty() &&
          "X86VZeroUpper state should be clear");

>From f48711125078dc880ffd49f8da04da716b9df721 Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 05:28:06 +0000
Subject: [PATCH 5/6] undo undo namespace usage

---
 llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index ff60522b847b4..5780ac373619a 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -91,6 +91,9 @@ namespace {
 
     BlockState() = default;
   };
+
+  using BlockStateMap = SmallVector<BlockState, 8>;
+  using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
 } // end anonymous namespace
 
 char X86IssueVZeroUpperLegacy::ID = 0;
@@ -172,8 +175,8 @@ static void insertVZeroUpper(MachineBasicBlock::iterator I,
 
 /// Add MBB to the DirtySuccessors list if it hasn't already been added.
 static void addDirtySuccessor(MachineBasicBlock &MBB,
-                              SmallVectorImpl<BlockState> &BlockStates,
-                              SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors) {
+                              BlockStateMap &BlockStates,
+                              DirtySuccessorsWorkList &DirtySuccessors) {
   if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
     DirtySuccessors.push_back(&MBB);
     BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
@@ -183,8 +186,8 @@ static void addDirtySuccessor(MachineBasicBlock &MBB,
 /// Loop over all of the instructions in the basic block, inserting vzeroupper
 /// instructions before function calls.
 static void processBasicBlock(MachineBasicBlock &MBB,
-                              SmallVectorImpl<BlockState> &BlockStates,
-                              SmallVectorImpl<MachineBasicBlock *> &DirtySuccessors,
+                              BlockStateMap &BlockStates,
+                              DirtySuccessorsWorkList &DirtySuccessors,
                               bool IsX86INTR, const TargetInstrInfo *TII,
                               bool &EverMadeChange) {
   // Start by assuming that the block is PASS_THROUGH which implies no unguarded
@@ -300,8 +303,8 @@ static bool issueVZeroUpper(MachineFunction &MF) {
   const TargetInstrInfo *TII = ST.getInstrInfo();
   bool IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
   bool EverMadeChange = false;
-  SmallVector<BlockState, 8> BlockStates(MF.getNumBlockIDs());
-  SmallVector<MachineBasicBlock *, 8> DirtySuccessors;
+  BlockStateMap BlockStates(MF.getNumBlockIDs());
+  DirtySuccessorsWorkList DirtySuccessors;
 
   assert(BlockStates.size() == MF.getNumBlockIDs() && DirtySuccessors.empty() &&
          "X86VZeroUpper state should be clear");

>From 45958e776633c80a47074f574771bbdefe438e03 Mon Sep 17 00:00:00 2001
From: Kyungtak Woo <kevinwkt at google.com>
Date: Wed, 11 Feb 2026 05:51:25 +0000
Subject: [PATCH 6/6] clang fmt

---
 llvm/lib/Target/X86/X86IssueVZeroUpper.cpp | 109 +++++++++++----------
 1 file changed, 58 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
index 5780ac373619a..26b91516c0e41 100644
--- a/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86IssueVZeroUpper.cpp
@@ -1,4 +1,5 @@
-//===- X86IssueVZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
+//===- X86IssueVZeroUpper.cpp - AVX vzeroupper instruction inserter
+//------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -42,58 +43,58 @@ using namespace llvm;
 #define DEBUG_TYPE "x86-issue-vzero-upper"
 
 static cl::opt<bool>
-UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
-  cl::desc("Minimize AVX to SSE transition penalty"),
-  cl::init(true));
+    UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
+                  cl::desc("Minimize AVX to SSE transition penalty"),
+                  cl::init(true));
 
 STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
 
 namespace {
 
-  class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
-  public:
-    static char ID;
-    X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
+class X86IssueVZeroUpperLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+  X86IssueVZeroUpperLegacy() : MachineFunctionPass(ID) {}
 
-    StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
+  StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
 
-    bool runOnMachineFunction(MachineFunction &MF) override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
 
-    MachineFunctionProperties getRequiredProperties() const override {
-      return MachineFunctionProperties().setNoVRegs();
-    }
-  };
-
-  enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
-
-  // Core algorithm state:
-  // BlockState - Each block is either:
-  //   - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
-  //                   vzeroupper instructions in this block.
-  //   - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
-  //                  block that will ensure that YMM/ZMM is clean on exit.
-  //   - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
-  //                  subsequent vzeroupper in the block clears it.
-  //
-  // AddedToDirtySuccessors - This flag is raised when a block is added to the
-  //                          DirtySuccessors list to ensure that it's not
-  //                          added multiple times.
-  //
-  // FirstUnguardedCall - Records the location of the first unguarded call in
-  //                      each basic block that may need to be guarded by a
-  //                      vzeroupper. We won't know whether it actually needs
-  //                      to be guarded until we discover a predecessor that
-  //                      is DIRTY_OUT.
-  struct BlockState {
-    BlockExitState ExitState = PASS_THROUGH;
-    bool AddedToDirtySuccessors = false;
-    MachineBasicBlock::iterator FirstUnguardedCall;
-
-    BlockState() = default;
-  };
-
-  using BlockStateMap = SmallVector<BlockState, 8>;
-  using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().setNoVRegs();
+  }
+};
+
+enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
+
+// Core algorithm state:
+// BlockState - Each block is either:
+//   - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
+//                   vzeroupper instructions in this block.
+//   - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
+//                  block that will ensure that YMM/ZMM is clean on exit.
+//   - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
+//                  subsequent vzeroupper in the block clears it.
+//
+// AddedToDirtySuccessors - This flag is raised when a block is added to the
+//                          DirtySuccessors list to ensure that it's not
+//                          added multiple times.
+//
+// FirstUnguardedCall - Records the location of the first unguarded call in
+//                      each basic block that may need to be guarded by a
+//                      vzeroupper. We won't know whether it actually needs
+//                      to be guarded until we discover a predecessor that
+//                      is DIRTY_OUT.
+struct BlockState {
+  BlockExitState ExitState = PASS_THROUGH;
+  bool AddedToDirtySuccessors = false;
+  MachineBasicBlock::iterator FirstUnguardedCall;
+
+  BlockState() = default;
+};
+
+using BlockStateMap = SmallVector<BlockState, 8>;
+using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
 } // end anonymous namespace
 
 char X86IssueVZeroUpperLegacy::ID = 0;
@@ -103,11 +104,14 @@ FunctionPass *llvm::createX86IssueVZeroUpperLegacyPass() {
 }
 
 #ifndef NDEBUG
-static const char* getBlockExitStateName(BlockExitState ST) {
+static const char *getBlockExitStateName(BlockExitState ST) {
   switch (ST) {
-    case PASS_THROUGH: return "Pass-through";
-    case EXITS_DIRTY: return "Exits-dirty";
-    case EXITS_CLEAN: return "Exits-clean";
+  case PASS_THROUGH:
+    return "Pass-through";
+  case EXITS_DIRTY:
+    return "Exits-dirty";
+  case EXITS_CLEAN:
+    return "Exits-clean";
   }
   llvm_unreachable("Invalid block exit state.");
 }
@@ -313,7 +317,8 @@ static bool issueVZeroUpper(MachineFunction &MF) {
   // unguarded call in each block, and add successors of dirty blocks to the
   // DirtySuccessors list.
   for (MachineBasicBlock &MBB : MF)
-    processBasicBlock(MBB, BlockStates, DirtySuccessors, IsX86INTR, TII, EverMadeChange);
+    processBasicBlock(MBB, BlockStates, DirtySuccessors, IsX86INTR, TII,
+                      EverMadeChange);
 
   // If any YMM/ZMM regs are live-in to this function, add the entry block to
   // the DirtySuccessors list
@@ -354,5 +359,7 @@ bool X86IssueVZeroUpperLegacy::runOnMachineFunction(MachineFunction &MF) {
 PreservedAnalyses
 X86IssueVZeroUpperPass::run(MachineFunction &MF,
                             MachineFunctionAnalysisManager &MFAM) {
-  return issueVZeroUpper(MF) ? getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>() : PreservedAnalyses::all();
+  return issueVZeroUpper(MF) ? getMachineFunctionPassPreservedAnalyses()
+                                   .preserveSet<CFGAnalyses>()
+                             : PreservedAnalyses::all();
 }



More information about the llvm-commits mailing list