[llvm] Revert "TLS loads opimization (hoist)" (PR #114740)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 05:33:31 PST 2024
https://github.com/abhishek-kaushik22 updated https://github.com/llvm/llvm-project/pull/114740
>From 2f8aeb88ad85eedf8d2152bbdcd3e5ccc9d87e8e Mon Sep 17 00:00:00 2001
From: abhishek-kaushik22 <abhishek.kaushik at intel.com>
Date: Mon, 4 Nov 2024 16:04:44 +0530
Subject: [PATCH 1/3] Revert "TLS loads opimization (hoist)"
This reverts commit c31014322c0b5ae596da129cbb844fb2198b4ef4.
---
llvm/docs/LangRef.rst | 5 -
llvm/include/llvm/InitializePasses.h | 1 -
llvm/include/llvm/LinkAllPasses.h | 1 -
llvm/include/llvm/Transforms/Scalar.h | 6 -
.../llvm/Transforms/Scalar/TLSVariableHoist.h | 131 -------
llvm/lib/CodeGen/TargetPassConfig.cpp | 3 -
llvm/lib/Passes/PassBuilder.cpp | 1 -
llvm/lib/Passes/PassRegistry.def | 1 -
llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 -
llvm/lib/Transforms/Scalar/Scalar.cpp | 1 -
.../Transforms/Scalar/TLSVariableHoist.cpp | 293 ---------------
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 2 -
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 7 -
llvm/test/CodeGen/ARM/O3-pipeline.ll | 1 -
llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 1 -
llvm/test/CodeGen/X86/opt-pipeline.ll | 2 -
llvm/test/CodeGen/X86/tls-loads-control.ll | 248 ------------
llvm/test/CodeGen/X86/tls-loads-control2.ll | 50 ---
llvm/test/CodeGen/X86/tls-loads-control3.ll | 354 ------------------
llvm/tools/llc/llc.cpp | 1 -
20 files changed, 1110 deletions(-)
delete mode 100644 llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h
delete mode 100644 llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
delete mode 100644 llvm/test/CodeGen/X86/tls-loads-control.ll
delete mode 100644 llvm/test/CodeGen/X86/tls-loads-control2.ll
delete mode 100644 llvm/test/CodeGen/X86/tls-loads-control3.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 566d0d4e4e81a3..e7829a511b8159 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2495,11 +2495,6 @@ example:
function with a tail call. The prototype of a thunk should not be used for
optimization purposes. The caller is expected to cast the thunk prototype to
match the thunk target prototype.
-
-``"tls-load-hoist"``
- This attribute indicates that the function will try to reduce redundant
- tls address calculation by hoisting tls variable.
-
``uwtable[(sync|async)]``
This attribute indicates that the ABI being targeted requires that
an unwind table entry be produced for this function even if we can
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 1374880b6a716b..43a435f9c65b7a 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -303,7 +303,6 @@ void initializeTailDuplicatePass(PassRegistry &);
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry &);
void initializeTargetPassConfigPass(PassRegistry &);
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &);
-void initializeTLSVariableHoistLegacyPassPass(PassRegistry &);
void initializeTwoAddressInstructionLegacyPassPass(PassRegistry &);
void initializeTypeBasedAAWrapperPassPass(PassRegistry &);
void initializeTypePromotionLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 92b59a66567c95..28c26594d7ecae 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -112,7 +112,6 @@ struct ForcePassLinking {
(void)llvm::createSROAPass();
(void)llvm::createSingleLoopExtractorPass();
(void)llvm::createTailCallEliminationPass();
- (void)llvm::createTLSVariableHoistPass();
(void)llvm::createConstantHoistingPass();
(void)llvm::createCodeGenPrepareLegacyPass();
(void)llvm::createPostInlineEntryExitInstrumenterPass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 17f4327eb3e1ab..fc772a7639c473 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -151,12 +151,6 @@ Pass *createMergeICmpsLegacyPass();
FunctionPass *createInferAddressSpacesPass(unsigned AddressSpace = ~0u);
extern char &InferAddressSpacesID;
-//===----------------------------------------------------------------------===//
-//
-// TLSVariableHoist - This pass reduce duplicated TLS address call.
-//
-FunctionPass *createTLSVariableHoistPass();
-
//===----------------------------------------------------------------------===//
//
// PartiallyInlineLibCalls - Tries to inline the fast path of library
diff --git a/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h b/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h
deleted file mode 100644
index 2a1b02b40eebff..00000000000000
--- a/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h
+++ /dev/null
@@ -1,131 +0,0 @@
-//==- TLSVariableHoist.h ------ Remove Redundant TLS Loads -------*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass identifies/eliminates Redundant TLS Loads if related option is set.
-// For example:
-// static __thread int x;
-// int g();
-// int f(int c) {
-// int *px = &x;
-// while (c--)
-// *px += g();
-// return *px;
-// }
-//
-// will generate Redundant TLS Loads by compiling it with
-// clang++ -fPIC -ftls-model=global-dynamic -O2 -S
-//
-// .LBB0_2: # %while.body
-// # =>This Inner Loop Header: Depth=1
-// callq _Z1gv at PLT
-// movl %eax, %ebp
-// leaq _ZL1x at TLSLD(%rip), %rdi
-// callq __tls_get_addr at PLT
-// addl _ZL1x at DTPOFF(%rax), %ebp
-// movl %ebp, _ZL1x at DTPOFF(%rax)
-// addl $-1, %ebx
-// jne .LBB0_2
-// jmp .LBB0_3
-// .LBB0_4: # %entry.while.end_crit_edge
-// leaq _ZL1x at TLSLD(%rip), %rdi
-// callq __tls_get_addr at PLT
-// movl _ZL1x at DTPOFF(%rax), %ebp
-//
-// The Redundant TLS Loads will hurt the performance, especially in loops.
-// So we try to eliminate/move them if required by customers, let it be:
-//
-// # %bb.0: # %entry
-// ...
-// movl %edi, %ebx
-// leaq _ZL1x at TLSLD(%rip), %rdi
-// callq __tls_get_addr at PLT
-// leaq _ZL1x at DTPOFF(%rax), %r14
-// testl %ebx, %ebx
-// je .LBB0_1
-// .LBB0_2: # %while.body
-// # =>This Inner Loop Header: Depth=1
-// callq _Z1gv at PLT
-// addl (%r14), %eax
-// movl %eax, (%r14)
-// addl $-1, %ebx
-// jne .LBB0_2
-// jmp .LBB0_3
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H
-#define LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H
-
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/PassManager.h"
-
-namespace llvm {
-
-class BasicBlock;
-class DominatorTree;
-class Function;
-class GlobalVariable;
-class Instruction;
-
-/// A private "module" namespace for types and utilities used by
-/// TLSVariableHoist. These are implementation details and should
-/// not be used by clients.
-namespace tlshoist {
-
-/// Keeps track of the user of a TLS variable and the operand index
-/// where the variable is used.
-struct TLSUser {
- Instruction *Inst;
- unsigned OpndIdx;
-
- TLSUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) {}
-};
-
-/// Keeps track of a TLS variable candidate and its users.
-struct TLSCandidate {
- SmallVector<TLSUser, 8> Users;
-
- /// Add the user to the use list and update the cost.
- void addUser(Instruction *Inst, unsigned Idx) {
- Users.push_back(TLSUser(Inst, Idx));
- }
-};
-
-} // end namespace tlshoist
-
-class TLSVariableHoistPass : public PassInfoMixin<TLSVariableHoistPass> {
-public:
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
- // Glue for old PM.
- bool runImpl(Function &F, DominatorTree &DT, LoopInfo &LI);
-
-private:
- DominatorTree *DT;
- LoopInfo *LI;
-
- /// Keeps track of TLS variable candidates found in the function.
- using TLSCandMapType = MapVector<GlobalVariable *, tlshoist::TLSCandidate>;
- TLSCandMapType TLSCandMap;
-
- void collectTLSCandidates(Function &Fn);
- void collectTLSCandidate(Instruction *Inst);
- Instruction *getNearestLoopDomInst(BasicBlock *BB, Loop *L);
- Instruction *getDomInst(Instruction *I1, Instruction *I2);
- BasicBlock::iterator findInsertPos(Function &Fn, GlobalVariable *GV,
- BasicBlock *&PosBB);
- Instruction *genBitCastInst(Function &Fn, GlobalVariable *GV);
- bool tryReplaceTLSCandidates(Function &Fn);
- bool tryReplaceTLSCandidate(Function &Fn, GlobalVariable *GV);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 02c3a852697580..e2b6aadbb24fb0 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -881,9 +881,6 @@ void TargetPassConfig::addIRPasses() {
if (!DisableExpandReductions)
addPass(createExpandReductionsPass());
- if (getOptLevel() != CodeGenOptLevel::None)
- addPass(createTLSVariableHoistPass());
-
// Convert conditional moves to conditional jumps when profitable.
if (getOptLevel() != CodeGenOptLevel::None && !DisableSelectOptimize)
addPass(createSelectOptimizePass());
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 60ab33bee704c1..abf464825cbd00 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -287,7 +287,6 @@
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
-#include "llvm/Transforms/Scalar/TLSVariableHoist.h"
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Transforms/Utils/AddDiscriminators.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 549c1359b5852c..b6f9208fbad0fb 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -468,7 +468,6 @@ FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
FUNCTION_PASS("stack-protector", StackProtectorPass(TM))
FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
-FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
FUNCTION_PASS("trigger-crash-function", TriggerCrashFunctionPass())
FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 939a1457239567..84a5b02043d012 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -78,7 +78,6 @@ add_llvm_component_library(LLVMScalarOpts
StraightLineStrengthReduce.cpp
StructurizeCFG.cpp
TailRecursionElimination.cpp
- TLSVariableHoist.cpp
WarnMissedTransforms.cpp
ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index fa6e671830d962..c7e4a3e824700e 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -44,7 +44,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeStructurizeCFGLegacyPassPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
- initializeTLSVariableHoistLegacyPassPass(Registry);
initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReduceLegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp b/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
deleted file mode 100644
index 58ea5b68d5488b..00000000000000
--- a/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-//===- TLSVariableHoist.cpp -------- Remove Redundant TLS Loads ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass identifies/eliminate Redundant TLS Loads if related option is set.
-// The example: Please refer to the comment at the head of TLSVariableHoist.h.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/TLSVariableHoist.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <iterator>
-#include <utility>
-
-using namespace llvm;
-using namespace tlshoist;
-
-#define DEBUG_TYPE "tlshoist"
-
-static cl::opt<bool> TLSLoadHoist(
- "tls-load-hoist", cl::init(false), cl::Hidden,
- cl::desc("hoist the TLS loads in PIC model to eliminate redundant "
- "TLS address calculation."));
-
-namespace {
-
-/// The TLS Variable hoist pass.
-class TLSVariableHoistLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
-
- TLSVariableHoistLegacyPass() : FunctionPass(ID) {
- initializeTLSVariableHoistLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &Fn) override;
-
- StringRef getPassName() const override { return "TLS Variable Hoist"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- }
-
-private:
- TLSVariableHoistPass Impl;
-};
-
-} // end anonymous namespace
-
-char TLSVariableHoistLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(TLSVariableHoistLegacyPass, "tlshoist",
- "TLS Variable Hoist", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(TLSVariableHoistLegacyPass, "tlshoist",
- "TLS Variable Hoist", false, false)
-
-FunctionPass *llvm::createTLSVariableHoistPass() {
- return new TLSVariableHoistLegacyPass();
-}
-
-/// Perform the TLS Variable Hoist optimization for the given function.
-bool TLSVariableHoistLegacyPass::runOnFunction(Function &Fn) {
- if (skipFunction(Fn))
- return false;
-
- LLVM_DEBUG(dbgs() << "********** Begin TLS Variable Hoist **********\n");
- LLVM_DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
-
- bool MadeChange =
- Impl.runImpl(Fn, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- getAnalysis<LoopInfoWrapperPass>().getLoopInfo());
-
- if (MadeChange) {
- LLVM_DEBUG(dbgs() << "********** Function after TLS Variable Hoist: "
- << Fn.getName() << '\n');
- LLVM_DEBUG(dbgs() << Fn);
- }
- LLVM_DEBUG(dbgs() << "********** End TLS Variable Hoist **********\n");
-
- return MadeChange;
-}
-
-void TLSVariableHoistPass::collectTLSCandidate(Instruction *Inst) {
- // Skip all cast instructions. They are visited indirectly later on.
- if (Inst->isCast())
- return;
-
- // Scan all operands.
- for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
- auto *GV = dyn_cast<GlobalVariable>(Inst->getOperand(Idx));
- if (!GV || !GV->isThreadLocal())
- continue;
-
- // Add Candidate to TLSCandMap (GV --> Candidate).
- TLSCandMap[GV].addUser(Inst, Idx);
- }
-}
-
-void TLSVariableHoistPass::collectTLSCandidates(Function &Fn) {
- // First, quickly check if there is TLS Variable.
- Module *M = Fn.getParent();
-
- bool HasTLS = llvm::any_of(
- M->globals(), [](GlobalVariable &GV) { return GV.isThreadLocal(); });
-
- // If non, directly return.
- if (!HasTLS)
- return;
-
- TLSCandMap.clear();
-
- // Then, collect TLS Variable info.
- for (BasicBlock &BB : Fn) {
- // Ignore unreachable basic blocks.
- if (!DT->isReachableFromEntry(&BB))
- continue;
-
- for (Instruction &Inst : BB)
- collectTLSCandidate(&Inst);
- }
-}
-
-static bool oneUseOutsideLoop(tlshoist::TLSCandidate &Cand, LoopInfo *LI) {
- if (Cand.Users.size() != 1)
- return false;
-
- BasicBlock *BB = Cand.Users[0].Inst->getParent();
- if (LI->getLoopFor(BB))
- return false;
-
- return true;
-}
-
-Instruction *TLSVariableHoistPass::getNearestLoopDomInst(BasicBlock *BB,
- Loop *L) {
- assert(L && "Unexcepted Loop status!");
-
- // Get the outermost loop.
- while (Loop *Parent = L->getParentLoop())
- L = Parent;
-
- BasicBlock *PreHeader = L->getLoopPreheader();
-
- // There is unique predecessor outside the loop.
- if (PreHeader)
- return PreHeader->getTerminator();
-
- BasicBlock *Header = L->getHeader();
- BasicBlock *Dom = Header;
- for (BasicBlock *PredBB : predecessors(Header))
- Dom = DT->findNearestCommonDominator(Dom, PredBB);
-
- assert(Dom && "Not find dominator BB!");
- Instruction *Term = Dom->getTerminator();
-
- return Term;
-}
-
-Instruction *TLSVariableHoistPass::getDomInst(Instruction *I1,
- Instruction *I2) {
- if (!I1)
- return I2;
- return DT->findNearestCommonDominator(I1, I2);
-}
-
-BasicBlock::iterator TLSVariableHoistPass::findInsertPos(Function &Fn,
- GlobalVariable *GV,
- BasicBlock *&PosBB) {
- tlshoist::TLSCandidate &Cand = TLSCandMap[GV];
-
- // We should hoist the TLS use out of loop, so choose its nearest instruction
- // which dominate the loop and the outside loops (if exist).
- Instruction *LastPos = nullptr;
- for (auto &User : Cand.Users) {
- BasicBlock *BB = User.Inst->getParent();
- Instruction *Pos = User.Inst;
- if (Loop *L = LI->getLoopFor(BB)) {
- Pos = getNearestLoopDomInst(BB, L);
- assert(Pos && "Not find insert position out of loop!");
- }
- Pos = getDomInst(LastPos, Pos);
- LastPos = Pos;
- }
-
- assert(LastPos && "Unexpected insert position!");
- BasicBlock *Parent = LastPos->getParent();
- PosBB = Parent;
- return LastPos->getIterator();
-}
-
-// Generate a bitcast (no type change) to replace the uses of TLS Candidate.
-Instruction *TLSVariableHoistPass::genBitCastInst(Function &Fn,
- GlobalVariable *GV) {
- BasicBlock *PosBB = &Fn.getEntryBlock();
- BasicBlock::iterator Iter = findInsertPos(Fn, GV, PosBB);
- Type *Ty = GV->getType();
- auto *CastInst = new BitCastInst(GV, Ty, "tls_bitcast");
- CastInst->insertInto(PosBB, Iter);
- return CastInst;
-}
-
-bool TLSVariableHoistPass::tryReplaceTLSCandidate(Function &Fn,
- GlobalVariable *GV) {
-
- tlshoist::TLSCandidate &Cand = TLSCandMap[GV];
-
- // If only used 1 time and not in loops, we no need to replace it.
- if (oneUseOutsideLoop(Cand, LI))
- return false;
-
- // Generate a bitcast (no type change)
- auto *CastInst = genBitCastInst(Fn, GV);
-
- // to replace the uses of TLS Candidate
- for (auto &User : Cand.Users)
- User.Inst->setOperand(User.OpndIdx, CastInst);
-
- return true;
-}
-
-bool TLSVariableHoistPass::tryReplaceTLSCandidates(Function &Fn) {
- if (TLSCandMap.empty())
- return false;
-
- bool Replaced = false;
- for (auto &GV2Cand : TLSCandMap) {
- GlobalVariable *GV = GV2Cand.first;
- Replaced |= tryReplaceTLSCandidate(Fn, GV);
- }
-
- return Replaced;
-}
-
-/// Optimize expensive TLS variables in the given function.
-bool TLSVariableHoistPass::runImpl(Function &Fn, DominatorTree &DT,
- LoopInfo &LI) {
- if (Fn.hasOptNone())
- return false;
-
- if (!TLSLoadHoist && !Fn.getAttributes().hasFnAttr("tls-load-hoist"))
- return false;
-
- this->LI = &LI;
- this->DT = &DT;
- assert(this->LI && this->DT && "Unexcepted requirement!");
-
- // Collect all TLS variable candidates.
- collectTLSCandidates(Fn);
-
- bool MadeChange = tryReplaceTLSCandidates(Fn);
-
- return MadeChange;
-}
-
-PreservedAnalyses TLSVariableHoistPass::run(Function &F,
- FunctionAnalysisManager &AM) {
-
- auto &LI = AM.getResult<LoopAnalysis>(F);
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
-
- if (!runImpl(F, DT, LI))
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index fb94c040ae341a..de370ac0696f56 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -63,8 +63,6 @@
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: TLS Variable Hoist
; CHECK-NEXT: Post-Dominator Tree Construction
; CHECK-NEXT: Branch Probability Analysis
; CHECK-NEXT: Block Frequency Analysis
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 646b1264f5deaa..c0a87cf4ceacfa 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -227,8 +227,6 @@
; GCN-O1-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O1-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O1-NEXT: Expand reduction intrinsics
-; GCN-O1-NEXT: Natural Loop Information
-; GCN-O1-NEXT: TLS Variable Hoist
; GCN-O1-NEXT: CallGraph Construction
; GCN-O1-NEXT: Call Graph SCC Pass Manager
; GCN-O1-NEXT: AMDGPU Annotate Kernel Features
@@ -522,8 +520,6 @@
; GCN-O1-OPTS-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O1-OPTS-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O1-OPTS-NEXT: Expand reduction intrinsics
-; GCN-O1-OPTS-NEXT: Natural Loop Information
-; GCN-O1-OPTS-NEXT: TLS Variable Hoist
; GCN-O1-OPTS-NEXT: Early CSE
; GCN-O1-OPTS-NEXT: CallGraph Construction
; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager
@@ -836,8 +832,6 @@
; GCN-O2-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O2-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O2-NEXT: Expand reduction intrinsics
-; GCN-O2-NEXT: Natural Loop Information
-; GCN-O2-NEXT: TLS Variable Hoist
; GCN-O2-NEXT: Early CSE
; GCN-O2-NEXT: CallGraph Construction
; GCN-O2-NEXT: Call Graph SCC Pass Manager
@@ -1159,7 +1153,6 @@
; GCN-O3-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O3-NEXT: Expand reduction intrinsics
; GCN-O3-NEXT: Natural Loop Information
-; GCN-O3-NEXT: TLS Variable Hoist
; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O3-NEXT: Function Alias Analysis Results
; GCN-O3-NEXT: Memory Dependence Analysis
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 819623d3fcc5a3..f6822713022a9c 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -42,7 +42,6 @@
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: TLS Variable Hoist
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index 21bd4bb8502c3d..5b8d6258d9e533 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -66,7 +66,6 @@
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: TLS Variable Hoist
; CHECK-NEXT: CodeGen Prepare
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Exception handling preparation
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 545640b7661691..1bca1b960edda4 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -62,8 +62,6 @@
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
-; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: TLS Variable Hoist
; CHECK-NEXT: Interleaved Access Pass
; CHECK-NEXT: X86 Partial Reduction
; CHECK-NEXT: Expand indirectbr instructions
diff --git a/llvm/test/CodeGen/X86/tls-loads-control.ll b/llvm/test/CodeGen/X86/tls-loads-control.ll
deleted file mode 100644
index 8d9bf61c53fa57..00000000000000
--- a/llvm/test/CodeGen/X86/tls-loads-control.ll
+++ /dev/null
@@ -1,248 +0,0 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic --tls-load-hoist=true --stop-after=tlshoist -o - %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic --stop-after=tlshoist -o - %s | FileCheck %s
-
-; This test come from compiling clang/test/CodeGen/intel/tls_loads.cpp with:
-; (clang tls_loads.cpp -fPIC -ftls-model=global-dynamic -O2 -S -emit-llvm)
-
-; // Variable declaration and definition:
-; thread_local int thl_x;
-; thread_local int thl_x2;
-;
-; struct SS {
-; char thl_c;
-; int num;
-; };
-;
-; int gfunc();
-; int gfunc2(int);
-
-; // First function (@_Z2f1i):
-; int f1(int c) {
-; while (c)
-; c++;
-;
-; int *px = &thl_x;
-; c -= gfunc();
-;
-; while(c++) {
-; c = gfunc();
-; while (c--)
-; *px += gfunc2(thl_x2);
-; }
-; return *px;
-; }
-
-$_ZTW5thl_x = comdat any
-
-$_ZTW6thl_x2 = comdat any
-
- at thl_x = thread_local global i32 0, align 4
- at thl_x2 = thread_local global i32 0, align 4
- at _ZZ2f2iE2st.0 = internal thread_local unnamed_addr global i8 0, align 4
- at _ZZ2f2iE2st.1 = internal thread_local unnamed_addr global i32 0, align 4
-
-; Function Attrs: mustprogress uwtable
-define noundef i32 @_Z2f1i(i32 noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: _Z2f1i
-; CHECK: entry:
-; CHECK-NEXT: %call = tail call noundef i32 @_Z5gfuncv()
-; CHECK-NEXT: %phi.cmp = icmp eq i32 %call, 0
-; CHECK-NEXT: %tls_bitcast1 = bitcast ptr @thl_x to ptr
-; CHECK-NEXT: br i1 %phi.cmp, label %while.end11, label %while.body4.preheader
-
-; CHECK: while.body4.preheader:
-; CHECK-NEXT: %tls_bitcast = bitcast ptr @thl_x2 to ptr
-; CHECK-NEXT: br label %while.body4
-
-; CHECK: while.body4:
-; CHECK-NEXT: %call5 = tail call noundef i32 @_Z5gfuncv()
-; CHECK-NEXT: %tobool7.not18 = icmp eq i32 %call5, 0
-; CHECK-NEXT: br i1 %tobool7.not18, label %while.body4.backedge, label %while.body8.preheader
-
-; CHECK: while.body8.preheader:
-; CHECK-NEXT: br label %while.body8
-
-; CHECK: while.body4.backedge.loopexit:
-; CHECK-NEXT: br label %while.body4.backedge
-
-; CHECK: while.body4.backedge:
-; CHECK-NEXT: br label %while.body4, !llvm.loop !4
-
-; CHECK: while.body8:
-; CHECK-NEXT: %c.addr.219 = phi i32 [ %dec, %while.body8 ], [ %call5, %while.body8.preheader ]
-; CHECK-NEXT: %dec = add i32 %c.addr.219, -1
-; CHECK-NEXT: %0 = load i32, ptr %tls_bitcast, align 4
-; CHECK-NEXT: %call9 = tail call noundef i32 @_Z6gfunc2i(i32 noundef %0)
-; CHECK-NEXT: %1 = load i32, ptr %tls_bitcast1, align 4
-; CHECK-NEXT: %add = add nsw i32 %1, %call9
-; CHECK-NEXT: store i32 %add, ptr %tls_bitcast1, align 4
-; CHECK-NEXT: %tobool7.not = icmp eq i32 %dec, 0
-; CHECK-NEXT: br i1 %tobool7.not, label %while.body4.backedge.loopexit, label %while.body8, !llvm.loop !4
-
-; CHECK: while.end11:
-; CHECK-NEXT: %2 = load i32, ptr %tls_bitcast1, align 4
-; CHECK-NEXT: ret i32 %2
-
-entry:
- %call = tail call noundef i32 @_Z5gfuncv()
- %phi.cmp = icmp eq i32 %call, 0
- br i1 %phi.cmp, label %while.end11, label %while.body4
-
-while.body4: ; preds = %entry, %while.body4.backedge
- %call5 = tail call noundef i32 @_Z5gfuncv()
- %tobool7.not18 = icmp eq i32 %call5, 0
- br i1 %tobool7.not18, label %while.body4.backedge, label %while.body8
-
-while.body4.backedge: ; preds = %while.body8, %while.body4
- br label %while.body4, !llvm.loop !4
-
-while.body8: ; preds = %while.body4, %while.body8
- %c.addr.219 = phi i32 [ %dec, %while.body8 ], [ %call5, %while.body4 ]
- %dec = add nsw i32 %c.addr.219, -1
- %0 = load i32, ptr @thl_x2, align 4
- %call9 = tail call noundef i32 @_Z6gfunc2i(i32 noundef %0)
- %1 = load i32, ptr @thl_x, align 4
- %add = add nsw i32 %1, %call9
- store i32 %add, ptr @thl_x, align 4
- %tobool7.not = icmp eq i32 %dec, 0
- br i1 %tobool7.not, label %while.body4.backedge, label %while.body8, !llvm.loop !4
-
-while.end11: ; preds = %entry
- %2 = load i32, ptr @thl_x, align 4
- ret i32 %2
-}
-
-; // Sencond function (@_Z2f2i):
-; int f2(int c) {
-; thread_local struct SS st;
-; c += gfunc();
-; while (c--) {
-; thl_x += gfunc();
-; st.thl_c += (char)gfunc();
-; st.num += gfunc();
-; }
-; return thl_x;
-; }
-declare noundef i32 @_Z5gfuncv() local_unnamed_addr #1
-
-declare noundef i32 @_Z6gfunc2i(i32 noundef) local_unnamed_addr #1
-
-; Function Attrs: mustprogress uwtable
-define noundef i32 @_Z2f2i(i32 noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: _Z2f2i
-; CHECK: entry:
-; CHECK-NEXT: %call = tail call noundef i32 @_Z5gfuncv()
-; CHECK-NEXT: %add = add nsw i32 %call, %c
-; CHECK-NEXT: %tobool.not12 = icmp eq i32 %add, 0
-; CHECK-NEXT: %tls_bitcast = bitcast ptr @thl_x to ptr
-; CHECK-NEXT: br i1 %tobool.not12, label %while.end, label %while.body.preheader
-
-; CHECK: while.body.preheader:
-; CHECK-NEXT: %tls_bitcast1 = bitcast ptr @_ZZ2f2iE2st.0 to ptr
-; CHECK-NEXT: %tls_bitcast2 = bitcast ptr @_ZZ2f2iE2st.1 to ptr
-; CHECK-NEXT: br label %while.body
-
-; CHECK: while.body:
-; CHECK-NEXT: %c.addr.013 = phi i32 [ %dec, %while.body ], [ %add, %while.body.preheader ]
-; CHECK-NEXT: %dec = add i32 %c.addr.013, -1
-; CHECK-NEXT: %call1 = tail call noundef i32 @_Z5gfuncv()
-; CHECK-NEXT: %0 = load i32, ptr %tls_bitcast, align 4
-; CHECK-NEXT: %add2 = add nsw i32 %0, %call1
-; CHECK-NEXT: store i32 %add2, ptr %tls_bitcast, align 4
-; CHECK-NEXT: %call3 = tail call noundef i32 @_Z5gfuncv()
-; CHECK-NEXT: %1 = load i8, ptr %tls_bitcast1, align 4
-; CHECK-NEXT: %2 = trunc i32 %call3 to i8
-; CHECK-NEXT: %conv7 = add i8 %1, %2
-; CHECK-NEXT: store i8 %conv7, ptr %tls_bitcast1, align 4
-; CHECK-NEXT: %call8 = tail call noundef i32 @_Z5gfuncv()
-; CHECK-NEXT: %3 = load i32, ptr %tls_bitcast2, align 4
-; CHECK-NEXT: %add9 = add nsw i32 %3, %call8
-; CHECK-NEXT: store i32 %add9, ptr %tls_bitcast2, align 4
-; CHECK-NEXT: %tobool.not = icmp eq i32 %dec, 0
-; CHECK-NEXT: br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: br label %while.end
-
-; CHECK: while.end:
-; CHECK-NEXT: %4 = load i32, ptr %tls_bitcast, align 4
-; CHECK-NEXT: ret i32 %4
-entry:
- %call = tail call noundef i32 @_Z5gfuncv()
- %add = add nsw i32 %call, %c
- %tobool.not12 = icmp eq i32 %add, 0
- br i1 %tobool.not12, label %while.end, label %while.body
-
-while.body: ; preds = %entry, %while.body
- %c.addr.013 = phi i32 [ %dec, %while.body ], [ %add, %entry ]
- %dec = add nsw i32 %c.addr.013, -1
- %call1 = tail call noundef i32 @_Z5gfuncv()
- %0 = load i32, ptr @thl_x, align 4
- %add2 = add nsw i32 %0, %call1
- store i32 %add2, ptr @thl_x, align 4
- %call3 = tail call noundef i32 @_Z5gfuncv()
- %1 = load i8, ptr @_ZZ2f2iE2st.0, align 4
- %2 = trunc i32 %call3 to i8
- %conv7 = add i8 %1, %2
- store i8 %conv7, ptr @_ZZ2f2iE2st.0, align 4
- %call8 = tail call noundef i32 @_Z5gfuncv()
- %3 = load i32, ptr @_ZZ2f2iE2st.1, align 4
- %add9 = add nsw i32 %3, %call8
- store i32 %add9, ptr @_ZZ2f2iE2st.1, align 4
- %tobool.not = icmp eq i32 %dec, 0
- br i1 %tobool.not, label %while.end, label %while.body
-
-while.end: ; preds = %while.body, %entry
- %4 = load i32, ptr @thl_x, align 4
- ret i32 %4
-}
-
-; // Third function (@_Z2f3i):
-; int f3(int c) {
-; int *px = &thl_x;
-; gfunc2(*px);
-; gfunc2(*px);
-; return 1;
-; }
-
-; Function Attrs: mustprogress uwtable
-define noundef i32 @_Z2f3i(i32 noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: _Z2f3i
-; CHECK: entry:
-; CHECK-NEXT: %tls_bitcast = bitcast ptr @thl_x to ptr
-; CHECK-NEXT: %0 = load i32, ptr %tls_bitcast, align 4
-; CHECK-NEXT: %call = tail call noundef i32 @_Z6gfunc2i(i32 noundef %0)
-; CHECK-NEXT: %1 = load i32, ptr %tls_bitcast, align 4
-; CHECK-NEXT: %call1 = tail call noundef i32 @_Z6gfunc2i(i32 noundef %1)
-; CHECK-NEXT: ret i32 1
-entry:
- %0 = load i32, ptr @thl_x, align 4
- %call = tail call noundef i32 @_Z6gfunc2i(i32 noundef %0)
- %1 = load i32, ptr @thl_x, align 4
- %call1 = tail call noundef i32 @_Z6gfunc2i(i32 noundef %1)
- ret i32 1
-}
-
-; Function Attrs: uwtable
-define weak_odr hidden noundef ptr @_ZTW5thl_x() local_unnamed_addr #2 comdat {
- ret ptr @thl_x
-}
-
-; Function Attrs: uwtable
-define weak_odr hidden noundef ptr @_ZTW6thl_x2() local_unnamed_addr #2 comdat {
- ret ptr @thl_x2
-}
-
-attributes #0 = { mustprogress uwtable "tls-load-hoist" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #2 = { uwtable "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-
-!llvm.module.flags = !{!0, !1, !2}
-!llvm.ident = !{!3}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"PIC Level", i32 2}
-!2 = !{i32 7, !"uwtable", i32 2}
-!3 = !{!"clang version 15.0.0"}
-!4 = distinct !{!4, !5}
-!5 = !{!"llvm.loop.mustprogress"}
diff --git a/llvm/test/CodeGen/X86/tls-loads-control2.ll b/llvm/test/CodeGen/X86/tls-loads-control2.ll
deleted file mode 100644
index fb0f1d2d7398d9..00000000000000
--- a/llvm/test/CodeGen/X86/tls-loads-control2.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: opt -S -mtriple=x86_64-unknown-unknown -passes=tlshoist --relocation-model=pic --tls-load-hoist=true -o - %s | FileCheck %s --check-prefix=HOIST0
-; RUN: opt -S -mtriple=x86_64-unknown-unknown -passes=tlshoist --relocation-model=pic -o - %s | FileCheck %s --check-prefix=HOIST2
-
-$_ZTW5thl_x = comdat any
-
- at thl_x = thread_local global i32 0, align 4
-
-; Function Attrs: mustprogress uwtable
-define i32 @_Z2f1i(i32 %c) local_unnamed_addr #0 {
-entry:
- %0 = load i32, ptr @thl_x, align 4
- %call = tail call i32 @_Z5gfunci(i32 %0)
- %1 = load i32, ptr @thl_x, align 4
- %call1 = tail call i32 @_Z5gfunci(i32 %1)
- ret i32 1
-}
-
-;HOIST0-LABEL: _Z2f1i
-;HOIST0: entry:
-;HOIST0-NEXT: %tls_bitcast = bitcast ptr @thl_x to ptr
-;HOIST0-NEXT: %0 = load i32, ptr %tls_bitcast, align 4
-;HOIST0-NEXT: %call = tail call i32 @_Z5gfunci(i32 %0)
-;HOIST0-NEXT: %1 = load i32, ptr %tls_bitcast, align 4
-;HOIST0-NEXT: %call1 = tail call i32 @_Z5gfunci(i32 %1)
-;HOIST0-NEXT: ret i32 1
-
-;HOIST2-LABEL: _Z2f1i
-;HOIST2: entry:
-;HOIST2-NEXT: %0 = load i32, ptr @thl_x, align 4
-;HOIST2-NEXT: %call = tail call i32 @_Z5gfunci(i32 %0)
-;HOIST2-NEXT: %1 = load i32, ptr @thl_x, align 4
-;HOIST2-NEXT: %call1 = tail call i32 @_Z5gfunci(i32 %1)
-;HOIST2-NEXT: ret i32 1
-
-declare i32 @_Z5gfunci(i32) local_unnamed_addr #1
-
-; Function Attrs: uwtable
-define weak_odr hidden ptr @_ZTW5thl_x() local_unnamed_addr #2 comdat {
- ret ptr @thl_x
-}
-
-attributes #0 = { mustprogress uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #2 = { uwtable "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-
-!llvm.module.flags = !{!0, !1, !2}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"PIC Level", i32 2}
-!2 = !{i32 7, !"uwtable", i32 1}
diff --git a/llvm/test/CodeGen/X86/tls-loads-control3.ll b/llvm/test/CodeGen/X86/tls-loads-control3.ll
deleted file mode 100644
index 92dccee296ad77..00000000000000
--- a/llvm/test/CodeGen/X86/tls-loads-control3.ll
+++ /dev/null
@@ -1,354 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic --tls-load-hoist=true -o - %s | FileCheck %s --check-prefix=HOIST0
-; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic -o - %s | FileCheck %s --check-prefix=HOIST2
-
-; This test has no module flag {"tls-load-hoist", i32 0}, so use --tls-load-hoist=x
-; to choose the way of loading thread_local address.
-
-; This test come from compiling clang/test/CodeGen/intel/tls_loads.cpp with:
-; (clang tls_loads.cpp -fPIC -ftls-model=global-dynamic -O2 -S -emit-llvm)
-
-$_ZTW5thl_x = comdat any
-
-$_ZTW6thl_x2 = comdat any
-
- at thl_x = thread_local global i32 0, align 4
- at thl_x2 = thread_local global i32 0, align 4
- at _ZZ2f2iE2st.0 = internal thread_local unnamed_addr global i8 0, align 4
- at _ZZ2f2iE2st.1 = internal thread_local unnamed_addr global i32 0, align 4
-
-; For HOIST0, check call __tls_get_addr at PLT only one time for each thread_local variable.
-; For HOIST2, Check the default way: usually call __tls_get_addr at PLT every time when use thread_local variable.
-
-; Function Attrs: mustprogress uwtable
-define i32 @_Z2f1i(i32 %c) local_unnamed_addr #0 {
-; HOIST0-LABEL: _Z2f1i:
-; HOIST0: # %bb.0: # %entry
-; HOIST0-NEXT: pushq %r15
-; HOIST0-NEXT: .cfi_def_cfa_offset 16
-; HOIST0-NEXT: pushq %r14
-; HOIST0-NEXT: .cfi_def_cfa_offset 24
-; HOIST0-NEXT: pushq %rbx
-; HOIST0-NEXT: .cfi_def_cfa_offset 32
-; HOIST0-NEXT: .cfi_offset %rbx, -32
-; HOIST0-NEXT: .cfi_offset %r14, -24
-; HOIST0-NEXT: .cfi_offset %r15, -16
-; HOIST0-NEXT: movl %edi, %ebx
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: leaq thl_x at TLSGD(%rip), %rdi
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: rex64
-; HOIST0-NEXT: callq __tls_get_addr at PLT
-; HOIST0-NEXT: movq %rax, %r14
-; HOIST0-NEXT: testl %ebx, %ebx
-; HOIST0-NEXT: je .LBB0_4
-; HOIST0-NEXT: # %bb.1: # %while.body.preheader
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: leaq thl_x2 at TLSGD(%rip), %rdi
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: rex64
-; HOIST0-NEXT: callq __tls_get_addr at PLT
-; HOIST0-NEXT: movq %rax, %r15
-; HOIST0-NEXT: .p2align 4
-; HOIST0-NEXT: .LBB0_2: # %while.body
-; HOIST0-NEXT: # =>This Inner Loop Header: Depth=1
-; HOIST0-NEXT: movl (%r15), %edi
-; HOIST0-NEXT: callq _Z6gfunc2i at PLT
-; HOIST0-NEXT: addl (%r14), %eax
-; HOIST0-NEXT: movl %eax, (%r14)
-; HOIST0-NEXT: decl %ebx
-; HOIST0-NEXT: jne .LBB0_2
-; HOIST0-NEXT: jmp .LBB0_3
-; HOIST0-NEXT: .LBB0_4: # %entry.while.end_crit_edge
-; HOIST0-NEXT: movl (%r14), %eax
-; HOIST0-NEXT: .LBB0_3: # %while.end
-; HOIST0-NEXT: popq %rbx
-; HOIST0-NEXT: .cfi_def_cfa_offset 24
-; HOIST0-NEXT: popq %r14
-; HOIST0-NEXT: .cfi_def_cfa_offset 16
-; HOIST0-NEXT: popq %r15
-; HOIST0-NEXT: .cfi_def_cfa_offset 8
-; HOIST0-NEXT: retq
-;
-; HOIST2-LABEL: _Z2f1i:
-; HOIST2: # %bb.0: # %entry
-; HOIST2-NEXT: pushq %rbp
-; HOIST2-NEXT: .cfi_def_cfa_offset 16
-; HOIST2-NEXT: pushq %rbx
-; HOIST2-NEXT: .cfi_def_cfa_offset 24
-; HOIST2-NEXT: pushq %rax
-; HOIST2-NEXT: .cfi_def_cfa_offset 32
-; HOIST2-NEXT: .cfi_offset %rbx, -24
-; HOIST2-NEXT: .cfi_offset %rbp, -16
-; HOIST2-NEXT: testl %edi, %edi
-; HOIST2-NEXT: je .LBB0_4
-; HOIST2-NEXT: # %bb.1:
-; HOIST2-NEXT: movl %edi, %ebx
-; HOIST2-NEXT: .p2align 4
-; HOIST2-NEXT: .LBB0_2: # %while.body
-; HOIST2-NEXT: # =>This Inner Loop Header: Depth=1
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: leaq thl_x2 at TLSGD(%rip), %rdi
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: rex64
-; HOIST2-NEXT: callq __tls_get_addr at PLT
-; HOIST2-NEXT: movl (%rax), %edi
-; HOIST2-NEXT: callq _Z6gfunc2i at PLT
-; HOIST2-NEXT: movl %eax, %ebp
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: leaq thl_x at TLSGD(%rip), %rdi
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: rex64
-; HOIST2-NEXT: callq __tls_get_addr at PLT
-; HOIST2-NEXT: addl (%rax), %ebp
-; HOIST2-NEXT: movl %ebp, (%rax)
-; HOIST2-NEXT: decl %ebx
-; HOIST2-NEXT: jne .LBB0_2
-; HOIST2-NEXT: jmp .LBB0_3
-; HOIST2-NEXT: .LBB0_4: # %entry.while.end_crit_edge
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: leaq thl_x at TLSGD(%rip), %rdi
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: rex64
-; HOIST2-NEXT: callq __tls_get_addr at PLT
-; HOIST2-NEXT: movl (%rax), %ebp
-; HOIST2-NEXT: .LBB0_3: # %while.end
-; HOIST2-NEXT: movl %ebp, %eax
-; HOIST2-NEXT: addq $8, %rsp
-; HOIST2-NEXT: .cfi_def_cfa_offset 24
-; HOIST2-NEXT: popq %rbx
-; HOIST2-NEXT: .cfi_def_cfa_offset 16
-; HOIST2-NEXT: popq %rbp
-; HOIST2-NEXT: .cfi_def_cfa_offset 8
-; HOIST2-NEXT: retq
-entry:
- %tobool.not3 = icmp eq i32 %c, 0
- br i1 %tobool.not3, label %entry.while.end_crit_edge, label %while.body
-
-entry.while.end_crit_edge: ; preds = %entry
- %.pre = load i32, ptr @thl_x, align 4
- br label %while.end
-
-while.body: ; preds = %entry, %while.body
- %c.addr.04 = phi i32 [ %dec, %while.body ], [ %c, %entry ]
- %dec = add nsw i32 %c.addr.04, -1
- %0 = load i32, ptr @thl_x2, align 4
- %call = tail call i32 @_Z6gfunc2i(i32 %0)
- %1 = load i32, ptr @thl_x, align 4
- %add = add nsw i32 %1, %call
- store i32 %add, ptr @thl_x, align 4
- %tobool.not = icmp eq i32 %dec, 0
- br i1 %tobool.not, label %while.end, label %while.body
-
-while.end: ; preds = %while.body, %entry.while.end_crit_edge
- %2 = phi i32 [ %.pre, %entry.while.end_crit_edge ], [ %add, %while.body ]
- ret i32 %2
-}
-
-declare i32 @_Z6gfunc2i(i32) local_unnamed_addr #1
-
-; Function Attrs: mustprogress uwtable
-define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 {
-; HOIST0-LABEL: _Z2f2i:
-; HOIST0: # %bb.0: # %entry
-; HOIST0-NEXT: pushq %r15
-; HOIST0-NEXT: .cfi_def_cfa_offset 16
-; HOIST0-NEXT: pushq %r14
-; HOIST0-NEXT: .cfi_def_cfa_offset 24
-; HOIST0-NEXT: pushq %r12
-; HOIST0-NEXT: .cfi_def_cfa_offset 32
-; HOIST0-NEXT: pushq %rbx
-; HOIST0-NEXT: .cfi_def_cfa_offset 40
-; HOIST0-NEXT: pushq %rax
-; HOIST0-NEXT: .cfi_def_cfa_offset 48
-; HOIST0-NEXT: .cfi_offset %rbx, -40
-; HOIST0-NEXT: .cfi_offset %r12, -32
-; HOIST0-NEXT: .cfi_offset %r14, -24
-; HOIST0-NEXT: .cfi_offset %r15, -16
-; HOIST0-NEXT: movl %edi, %ebx
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: leaq thl_x at TLSGD(%rip), %rdi
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: rex64
-; HOIST0-NEXT: callq __tls_get_addr at PLT
-; HOIST0-NEXT: movq %rax, %r14
-; HOIST0-NEXT: testl %ebx, %ebx
-; HOIST0-NEXT: je .LBB1_3
-; HOIST0-NEXT: # %bb.1: # %while.body.preheader
-; HOIST0-NEXT: leaq _ZZ2f2iE2st.0 at TLSLD(%rip), %rdi
-; HOIST0-NEXT: callq __tls_get_addr at PLT
-; HOIST0-NEXT: leaq _ZZ2f2iE2st.0 at DTPOFF(%rax), %r15
-; HOIST0-NEXT: leaq _ZZ2f2iE2st.1 at DTPOFF(%rax), %r12
-; HOIST0-NEXT: .p2align 4
-; HOIST0-NEXT: .LBB1_2: # %while.body
-; HOIST0-NEXT: # =>This Inner Loop Header: Depth=1
-; HOIST0-NEXT: callq _Z5gfuncv at PLT
-; HOIST0-NEXT: addl %eax, (%r14)
-; HOIST0-NEXT: callq _Z5gfuncv at PLT
-; HOIST0-NEXT: addb %al, (%r15)
-; HOIST0-NEXT: callq _Z5gfuncv at PLT
-; HOIST0-NEXT: addl %eax, (%r12)
-; HOIST0-NEXT: decl %ebx
-; HOIST0-NEXT: jne .LBB1_2
-; HOIST0-NEXT: .LBB1_3: # %while.end
-; HOIST0-NEXT: movl (%r14), %eax
-; HOIST0-NEXT: addq $8, %rsp
-; HOIST0-NEXT: .cfi_def_cfa_offset 40
-; HOIST0-NEXT: popq %rbx
-; HOIST0-NEXT: .cfi_def_cfa_offset 32
-; HOIST0-NEXT: popq %r12
-; HOIST0-NEXT: .cfi_def_cfa_offset 24
-; HOIST0-NEXT: popq %r14
-; HOIST0-NEXT: .cfi_def_cfa_offset 16
-; HOIST0-NEXT: popq %r15
-; HOIST0-NEXT: .cfi_def_cfa_offset 8
-; HOIST0-NEXT: retq
-;
-; HOIST2-LABEL: _Z2f2i:
-; HOIST2: # %bb.0: # %entry
-; HOIST2-NEXT: pushq %rbp
-; HOIST2-NEXT: .cfi_def_cfa_offset 16
-; HOIST2-NEXT: pushq %r14
-; HOIST2-NEXT: .cfi_def_cfa_offset 24
-; HOIST2-NEXT: pushq %rbx
-; HOIST2-NEXT: .cfi_def_cfa_offset 32
-; HOIST2-NEXT: .cfi_offset %rbx, -32
-; HOIST2-NEXT: .cfi_offset %r14, -24
-; HOIST2-NEXT: .cfi_offset %rbp, -16
-; HOIST2-NEXT: testl %edi, %edi
-; HOIST2-NEXT: je .LBB1_3
-; HOIST2-NEXT: # %bb.1: # %while.body.preheader
-; HOIST2-NEXT: movl %edi, %ebx
-; HOIST2-NEXT: .p2align 4
-; HOIST2-NEXT: .LBB1_2: # %while.body
-; HOIST2-NEXT: # =>This Inner Loop Header: Depth=1
-; HOIST2-NEXT: callq _Z5gfuncv at PLT
-; HOIST2-NEXT: movl %eax, %ebp
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: leaq thl_x at TLSGD(%rip), %rdi
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: rex64
-; HOIST2-NEXT: callq __tls_get_addr at PLT
-; HOIST2-NEXT: addl %ebp, (%rax)
-; HOIST2-NEXT: callq _Z5gfuncv at PLT
-; HOIST2-NEXT: movl %eax, %ebp
-; HOIST2-NEXT: leaq _ZZ2f2iE2st.0 at TLSLD(%rip), %rdi
-; HOIST2-NEXT: callq __tls_get_addr at PLT
-; HOIST2-NEXT: movq %rax, %r14
-; HOIST2-NEXT: addb %bpl, _ZZ2f2iE2st.0 at DTPOFF(%rax)
-; HOIST2-NEXT: callq _Z5gfuncv at PLT
-; HOIST2-NEXT: addl %eax, _ZZ2f2iE2st.1 at DTPOFF(%r14)
-; HOIST2-NEXT: decl %ebx
-; HOIST2-NEXT: jne .LBB1_2
-; HOIST2-NEXT: .LBB1_3: # %while.end
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: leaq thl_x at TLSGD(%rip), %rdi
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: rex64
-; HOIST2-NEXT: callq __tls_get_addr at PLT
-; HOIST2-NEXT: movl (%rax), %eax
-; HOIST2-NEXT: popq %rbx
-; HOIST2-NEXT: .cfi_def_cfa_offset 24
-; HOIST2-NEXT: popq %r14
-; HOIST2-NEXT: .cfi_def_cfa_offset 16
-; HOIST2-NEXT: popq %rbp
-; HOIST2-NEXT: .cfi_def_cfa_offset 8
-; HOIST2-NEXT: retq
-entry:
- %tobool.not9 = icmp eq i32 %c, 0
- br i1 %tobool.not9, label %while.end, label %while.body
-
-while.body: ; preds = %entry, %while.body
- %c.addr.010 = phi i32 [ %dec, %while.body ], [ %c, %entry ]
- %dec = add nsw i32 %c.addr.010, -1
- %call = tail call i32 @_Z5gfuncv()
- %0 = load i32, ptr @thl_x, align 4
- %add = add nsw i32 %0, %call
- store i32 %add, ptr @thl_x, align 4
- %call1 = tail call i32 @_Z5gfuncv()
- %1 = load i8, ptr @_ZZ2f2iE2st.0, align 4
- %2 = trunc i32 %call1 to i8
- %conv5 = add i8 %1, %2
- store i8 %conv5, ptr @_ZZ2f2iE2st.0, align 4
- %call6 = tail call i32 @_Z5gfuncv()
- %3 = load i32, ptr @_ZZ2f2iE2st.1, align 4
- %add7 = add nsw i32 %3, %call6
- store i32 %add7, ptr @_ZZ2f2iE2st.1, align 4
- %tobool.not = icmp eq i32 %dec, 0
- br i1 %tobool.not, label %while.end, label %while.body
-
-while.end: ; preds = %while.body, %entry
- %4 = load i32, ptr @thl_x, align 4
- ret i32 %4
-}
-
-declare i32 @_Z5gfuncv() local_unnamed_addr #1
-
-; Function Attrs: mustprogress uwtable
-define i32 @_Z2f3i(i32 %c) local_unnamed_addr #0 {
-; HOIST0-LABEL: _Z2f3i:
-; HOIST0: # %bb.0: # %entry
-; HOIST0-NEXT: pushq %rbx
-; HOIST0-NEXT: .cfi_def_cfa_offset 16
-; HOIST0-NEXT: .cfi_offset %rbx, -16
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: leaq thl_x at TLSGD(%rip), %rdi
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: data16
-; HOIST0-NEXT: rex64
-; HOIST0-NEXT: callq __tls_get_addr at PLT
-; HOIST0-NEXT: movq %rax, %rbx
-; HOIST0-NEXT: movl (%rax), %edi
-; HOIST0-NEXT: callq _Z6gfunc2i at PLT
-; HOIST0-NEXT: movl (%rbx), %edi
-; HOIST0-NEXT: callq _Z6gfunc2i at PLT
-; HOIST0-NEXT: movl $1, %eax
-; HOIST0-NEXT: popq %rbx
-; HOIST0-NEXT: .cfi_def_cfa_offset 8
-; HOIST0-NEXT: retq
-;
-; HOIST2-LABEL: _Z2f3i:
-; HOIST2: # %bb.0: # %entry
-; HOIST2-NEXT: pushq %rbx
-; HOIST2-NEXT: .cfi_def_cfa_offset 16
-; HOIST2-NEXT: .cfi_offset %rbx, -16
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: leaq thl_x at TLSGD(%rip), %rdi
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: data16
-; HOIST2-NEXT: rex64
-; HOIST2-NEXT: callq __tls_get_addr at PLT
-; HOIST2-NEXT: movq %rax, %rbx
-; HOIST2-NEXT: movl (%rax), %edi
-; HOIST2-NEXT: callq _Z6gfunc2i at PLT
-; HOIST2-NEXT: movl (%rbx), %edi
-; HOIST2-NEXT: callq _Z6gfunc2i at PLT
-; HOIST2-NEXT: movl $1, %eax
-; HOIST2-NEXT: popq %rbx
-; HOIST2-NEXT: .cfi_def_cfa_offset 8
-; HOIST2-NEXT: retq
-entry:
- %0 = load i32, ptr @thl_x, align 4
- %call = tail call i32 @_Z6gfunc2i(i32 %0)
- %1 = load i32, ptr @thl_x, align 4
- %call1 = tail call i32 @_Z6gfunc2i(i32 %1)
- ret i32 1
-}
-
-attributes #0 = { nounwind mustprogress uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #2 = { uwtable "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-
-!llvm.module.flags = !{!0, !1, !2}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"PIC Level", i32 2}
-!2 = !{i32 7, !"uwtable", i32 1}
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 2c1901cdd49d8b..5bf8ac5cf4181c 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -349,7 +349,6 @@ int main(int argc, char **argv) {
initializeHardwareLoopsLegacyPass(*Registry);
initializeTransformUtils(*Registry);
initializeReplaceWithVeclibLegacyPass(*Registry);
- initializeTLSVariableHoistLegacyPassPass(*Registry);
// Initialize debugging passes.
initializeScavengerTestPass(*Registry);
>From 4f2a584b67018e8acef3aa33f4531979b54b3be6 Mon Sep 17 00:00:00 2001
From: abhishek-kaushik22 <abhishek.kaushik at intel.com>
Date: Mon, 4 Nov 2024 17:18:39 +0530
Subject: [PATCH 2/3] Fix pipeline test for LoongArch M68k RISCV
---
llvm/test/CodeGen/LoongArch/opt-pipeline.ll | 1 -
llvm/test/CodeGen/M68k/pipeline.ll | 1 -
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 -
3 files changed, 3 deletions(-)
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 53cdbd18f9b907..da26e9846301a1 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -65,7 +65,6 @@
; LAXX-NEXT: Scalarize Masked Memory Intrinsics
; LAXX-NEXT: Expand reduction intrinsics
; LAXX-NEXT: Natural Loop Information
-; LAXX-NEXT: TLS Variable Hoist
; LAXX-NEXT: Type Promotion
; LAXX-NEXT: CodeGen Prepare
; LAXX-NEXT: Dominator Tree Construction
diff --git a/llvm/test/CodeGen/M68k/pipeline.ll b/llvm/test/CodeGen/M68k/pipeline.ll
index 6dc5310c736610..bc224743e5b707 100644
--- a/llvm/test/CodeGen/M68k/pipeline.ll
+++ b/llvm/test/CodeGen/M68k/pipeline.ll
@@ -36,7 +36,6 @@
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: TLS Variable Hoist
; CHECK-NEXT: CodeGen Prepare
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Exception handling preparation
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index c29f15a15c1503..f2693017d136bb 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -68,7 +68,6 @@
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Natural Loop Information
-; CHECK-NEXT: TLS Variable Hoist
; CHECK-NEXT: Type Promotion
; CHECK-NEXT: CodeGen Prepare
; CHECK-NEXT: Dominator Tree Construction
>From 3b73d40535beb5629152c6144efb548df254aff6 Mon Sep 17 00:00:00 2001
From: abhishek-kaushik22 <abhishek.kaushik at intel.com>
Date: Mon, 4 Nov 2024 19:03:04 +0530
Subject: [PATCH 3/3] Update AArch64/O3-pipeline.ll
---
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index de370ac0696f56..96c30c4aec0d17 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -63,6 +63,7 @@
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
+; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Post-Dominator Tree Construction
; CHECK-NEXT: Branch Probability Analysis
; CHECK-NEXT: Block Frequency Analysis
More information about the llvm-commits
mailing list