[llvm] 5af11ec - [Attributor] Determine potentially loaded values through memory
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 6 21:39:26 PST 2022
Author: Johannes Doerfert
Date: 2022-03-06T23:26:37-06:00
New Revision: 5af11ec34b7bae731c9cc0d40597f52345e33a94
URL: https://github.com/llvm/llvm-project/commit/5af11ec34b7bae731c9cc0d40597f52345e33a94
DIFF: https://github.com/llvm/llvm-project/commit/5af11ec34b7bae731c9cc0d40597f52345e33a94.diff
LOG: [Attributor] Determine potentially loaded values through memory
We already look through memory to determine where a value that is stored
might pop up again (potential copies). This patch introduces the other
direction with similar logic. If a value is loaded, we can follow all
the accesses to the pointer (or better object) and try to determine what
value might have been stored.
Added:
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Transforms/IPO/Attributor.cpp
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
llvm/test/Transforms/Attributor/align.ll
llvm/test/Transforms/Attributor/nodelete.ll
llvm/test/Transforms/Attributor/value-simplify-gpu.ll
llvm/test/Transforms/OpenMP/custom_state_machines.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 7072e949a0703..7af8f38e04646 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -199,9 +199,24 @@ bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
bool &UsedAssumedInformation,
bool Intraprocedural = false);
+/// Collect all potential values \p LI could read into \p PotentialValues. That
+/// is, the only values read by \p LI are assumed to be known and all are in
+/// \p PotentialValues. Dependences onto \p QueryingAA are properly tracked,
+/// \p UsedAssumedInformation will inform the caller if assumed information was
+/// used.
+///
+/// \returns True if the assumed potential copies are all in \p PotentialValues,
+/// false if something went wrong and the copies could not be
+/// determined.
+bool getPotentiallyLoadedValues(Attributor &A, LoadInst &LI,
+ SmallSetVector<Value *, 4> &PotentialValues,
+ const AbstractAttribute &QueryingAA,
+ bool &UsedAssumedInformation,
+ bool OnlyExact = false);
+
/// Collect all potential values of the one stored by \p SI into
/// \p PotentialCopies. That is, the only copies that were made via the
-/// store are assumed to be known and all in \p PotentialCopies. Dependences
+/// store are assumed to be known and all are in \p PotentialCopies. Dependences
/// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will
/// inform the caller if assumed information was used.
///
@@ -210,7 +225,8 @@ bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
/// determined.
bool getPotentialCopiesOfStoredValue(
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
- const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact = false);
/// Return true if \p IRP is readonly. This will query respective AAs that
/// deduce the information and introduce dependences for \p QueryingAA.
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 9f5907a1f131d..41302d747fc61 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -322,22 +322,32 @@ AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
return nullptr;
}
-bool AA::getPotentialCopiesOfStoredValue(
- Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
- const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) {
-
- Value &Ptr = *SI.getPointerOperand();
+template <bool IsLoad, typename Ty>
+static bool
+getPotentialCopiesOfMemoryValue(Attributor &A, Ty &I,
+ SmallSetVector<Value *, 4> &PotentialCopies,
+ const AbstractAttribute &QueryingAA,
+ bool &UsedAssumedInformation, bool OnlyExact) {
+ LLVM_DEBUG(dbgs() << "Trying to determine the potential copies of " << I
+ << " (only exact: " << OnlyExact << ")\n";);
+
+ Value &Ptr = *I.getPointerOperand();
SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI,
+ if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I,
UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs() << "Underlying objects stored into could not be determined\n";);
return false;
}
+ // Containers to remember the pointer infos and new copies while we are not
+ // sure that we can find all of them. If we abort we want to avoid spurious
+ // dependences and potential copies in the provided container.
SmallVector<const AAPointerInfo *> PIs;
SmallVector<Value *> NewCopies;
+ const auto *TLI =
+ A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction());
for (Value *Obj : Objects) {
LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
if (isa<UndefValue>(Obj))
@@ -345,7 +355,7 @@ bool AA::getPotentialCopiesOfStoredValue(
if (isa<ConstantPointerNull>(Obj)) {
// A null pointer access can be undefined but any offset from null may
// be OK. We do not try to optimize the latter.
- if (!NullPointerIsDefined(SI.getFunction(),
+ if (!NullPointerIsDefined(I.getFunction(),
Ptr.getType()->getPointerAddressSpace()) &&
A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) ==
Obj)
@@ -354,8 +364,9 @@ bool AA::getPotentialCopiesOfStoredValue(
dbgs() << "Underlying object is a valid nullptr, giving up.\n";);
return false;
}
+ // TODO: Use assumed noalias return.
if (!isa<AllocaInst>(Obj) && !isa<GlobalVariable>(Obj) &&
- !isNoAliasCall(Obj)) {
+ !(IsLoad ? isAllocationFn(Obj, TLI) : isNoAliasCall(Obj))) {
LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj
<< "\n";);
return false;
@@ -368,23 +379,54 @@ bool AA::getPotentialCopiesOfStoredValue(
return false;
}
+ if (IsLoad) {
+ Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI);
+ if (!InitialValue)
+ return false;
+ NewCopies.push_back(InitialValue);
+ }
+
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
- if (!Acc.isRead())
+ if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead()))
return true;
- auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
- if (!LI) {
- LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
- "instruction not supported yet: "
- << *Acc.getRemoteInst() << "\n";);
+ if (OnlyExact && !IsExact) {
+ LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst()
+ << ", abort!\n");
return false;
}
- NewCopies.push_back(LI);
+ if (IsLoad) {
+ assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
+ if (Acc.isWrittenValueYetUndetermined())
+ return true;
+ if (!Acc.isWrittenValueUnknown()) {
+ NewCopies.push_back(Acc.getWrittenValue());
+ return true;
+ }
+ auto *SI = dyn_cast<StoreInst>(Acc.getRemoteInst());
+ if (!SI) {
+ LLVM_DEBUG(dbgs() << "Underlying object written through a non-store "
+ "instruction not supported yet: "
+ << *Acc.getRemoteInst() << "\n";);
+ return false;
+ }
+ NewCopies.push_back(SI->getValueOperand());
+ } else {
+ assert(isa<StoreInst>(I) && "Expected load or store instruction only!");
+ auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
+ if (!LI && OnlyExact) {
+ LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
+ "instruction not supported yet: "
+ << *Acc.getRemoteInst() << "\n";);
+ return false;
+ }
+ NewCopies.push_back(Acc.getRemoteInst());
+ }
return true;
};
auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj),
DepClassTy::NONE);
- if (!PI.forallInterferingAccesses(A, QueryingAA, SI, CheckAccess)) {
+ if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) {
LLVM_DEBUG(
dbgs()
<< "Failed to verify all interfering accesses for underlying object: "
@@ -394,6 +436,9 @@ bool AA::getPotentialCopiesOfStoredValue(
PIs.push_back(&PI);
}
+ // Only if we were successful collection all potential copies we record
+ // dependences (on non-fix AAPointerInfo AAs). We also only then modify the
+ // given PotentialCopies container.
for (auto *PI : PIs) {
if (!PI->getState().isAtFixpoint())
UsedAssumedInformation = true;
@@ -404,6 +449,23 @@ bool AA::getPotentialCopiesOfStoredValue(
return true;
}
+bool AA::getPotentiallyLoadedValues(Attributor &A, LoadInst &LI,
+ SmallSetVector<Value *, 4> &PotentialValues,
+ const AbstractAttribute &QueryingAA,
+ bool &UsedAssumedInformation,
+ bool OnlyExact) {
+ return getPotentialCopiesOfMemoryValue</* IsLoad */ true>(
+ A, LI, PotentialValues, QueryingAA, UsedAssumedInformation, OnlyExact);
+}
+
+bool AA::getPotentialCopiesOfStoredValue(
+ Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact) {
+ return getPotentialCopiesOfMemoryValue</* IsLoad */ false>(
+ A, SI, PotentialCopies, QueryingAA, UsedAssumedInformation, OnlyExact);
+}
+
static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA,
bool RequireReadNone, bool &IsKnown) {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 4729fc753b4a6..9dbc845bdf4f9 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -400,6 +400,31 @@ static bool genericValueTraversal(
}
}
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ bool UsedAssumedInformation = false;
+ SmallSetVector<Value *, 4> PotentialCopies;
+ if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, QueryingAA,
+ UsedAssumedInformation,
+ /* OnlyExact */ true)) {
+ // Values have to be dynamically unique or we loose the fact that a
+ // single llvm::Value might represent two runtime values (e.g., stack
+ // locations in
diff erent recursive calls).
+ bool DynamicallyUnique =
+ llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) {
+ return AA::isDynamicallyUnique(A, QueryingAA, *PC);
+ });
+ if (DynamicallyUnique &&
+ (!Intraprocedural || !CtxI ||
+ llvm::all_of(PotentialCopies, [CtxI](Value *PC) {
+ return AA::isValidInScope(*PC, CtxI->getFunction());
+ }))) {
+ for (auto *PotentialCopy : PotentialCopies)
+ Worklist.push_back({PotentialCopy, CtxI});
+ continue;
+ }
+ }
+ }
+
// Once a leaf is reached we inform the user through the callback.
if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index a5a0fd27898dc..917dd988926d7 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -28,7 +28,7 @@ define internal i32 @deref(i32* %x) nounwind {
; IS__CGSCC_NPM-NEXT: [[X_PRIV:%.*]] = alloca i32, align 4
; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]], align 4
; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_PRIV]], align 4
-; IS__CGSCC_NPM-NEXT: ret i32 [[TMP0]]
+; IS__CGSCC_NPM-NEXT: ret i32 [[TMP2]]
;
entry:
%tmp2 = load i32, i32* %x, align 4
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
index 90437f5876d76..9963189e2200e 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
@@ -104,25 +104,15 @@ define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %
; IS________OPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
; IS________OPM-NEXT: ret void
;
-; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@promote_avx2
-; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_NPM-NEXT: bb:
-; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32
-; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32
-; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
-; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
-; IS__TUNIT_NPM-NEXT: ret void
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@promote_avx2
-; IS__CGSCC_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_NPM-NEXT: bb:
-; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32
-; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32
-; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
-; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG]], align 32
-; IS__CGSCC_NPM-NEXT: ret void
+; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
+; IS________NPM-LABEL: define {{[^@]+}}@promote_avx2
+; IS________NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
+; IS________NPM-NEXT: bb:
+; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32
+; IS________NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32
+; IS________NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
+; IS________NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
+; IS________NPM-NEXT: ret void
;
bb:
%tmp = load <4 x i64>, <4 x i64>* %arg1
@@ -181,8 +171,7 @@ define void @promote(<4 x i64>* %arg) #0 {
; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]]
; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32
; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]]
-; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
-; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG]], align 2
; IS__CGSCC_NPM-NEXT: ret void
;
bb:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
index 7a2b796cb3210..f8313bd66cdb6 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -19,25 +19,15 @@ define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal5
; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; IS________OPM-NEXT: ret void
;
-; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__TUNIT_NPM-NEXT: bb:
-; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
-; IS__TUNIT_NPM-NEXT: ret void
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT: bb:
-; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64
-; IS__CGSCC_NPM-NEXT: ret void
+; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
+; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS________NPM-NEXT: bb:
+; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
+; IS________NPM-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
@@ -97,8 +87,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>*
; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
-; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2
; IS__CGSCC_NPM-NEXT: ret void
;
bb:
@@ -123,25 +112,15 @@ define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal5
; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; IS________OPM-NEXT: ret void
;
-; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
-; IS__TUNIT_NPM-NEXT: bb:
-; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
-; IS__TUNIT_NPM-NEXT: ret void
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT: bb:
-; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64
-; IS__CGSCC_NPM-NEXT: ret void
+; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
+; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
+; IS________NPM-NEXT: bb:
+; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
+; IS________NPM-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
@@ -201,8 +180,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>*
; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
-; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2
; IS__CGSCC_NPM-NEXT: ret void
;
bb:
@@ -227,25 +205,15 @@ define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal5
; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; IS________OPM-NEXT: ret void
;
-; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] {
-; IS__TUNIT_NPM-NEXT: bb:
-; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
-; IS__TUNIT_NPM-NEXT: ret void
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT: bb:
-; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64
-; IS__CGSCC_NPM-NEXT: ret void
+; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
+; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] {
+; IS________NPM-NEXT: bb:
+; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
+; IS________NPM-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
@@ -305,8 +273,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>*
; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
-; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2
; IS__CGSCC_NPM-NEXT: ret void
;
bb:
@@ -331,25 +298,15 @@ define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal5
; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; IS________OPM-NEXT: ret void
;
-; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_NPM-NEXT: bb:
-; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
-; IS__TUNIT_NPM-NEXT: ret void
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_NPM-NEXT: bb:
-; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64
-; IS__CGSCC_NPM-NEXT: ret void
+; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
+; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
+; IS________NPM-NEXT: bb:
+; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
+; IS________NPM-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
@@ -409,8 +366,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>*
; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
-; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2
; IS__CGSCC_NPM-NEXT: ret void
;
bb:
@@ -437,7 +393,7 @@ define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal5
;
; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1:[0-9]+]] {
+; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] {
; IS________NPM-NEXT: bb:
; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64
; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
@@ -615,25 +571,15 @@ define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_p
; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; IS________OPM-NEXT: ret void
;
-; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
-; IS__TUNIT_NPM-NEXT: bb:
-; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
-; IS__TUNIT_NPM-NEXT: ret void
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT: bb:
-; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64
-; IS__CGSCC_NPM-NEXT: ret void
+; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
+; IS________NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
+; IS________NPM-NEXT: bb:
+; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
+; IS________NPM-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
@@ -693,8 +639,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar
; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
-; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2
; IS__CGSCC_NPM-NEXT: ret void
;
bb:
@@ -719,25 +664,15 @@ define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_p
; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; IS________OPM-NEXT: ret void
;
-; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR4]] {
-; IS__TUNIT_NPM-NEXT: bb:
-; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
-; IS__TUNIT_NPM-NEXT: ret void
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR4]] {
-; IS__CGSCC_NPM-NEXT: bb:
-; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64
-; IS__CGSCC_NPM-NEXT: ret void
+; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
+; IS________NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] {
+; IS________NPM-NEXT: bb:
+; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
+; IS________NPM-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
@@ -797,8 +732,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar
; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
-; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
-; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2
; IS__CGSCC_NPM-NEXT: ret void
;
bb:
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
index d368e22811894..bd768df52ed76 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -120,8 +120,8 @@ define internal i32 @vfu2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwi
; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
; IS__CGSCC_NPM-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8
-; IS__CGSCC_NPM-NEXT: [[TMP6:%.*]] = zext i8 [[TMP0]] to i32
-; IS__CGSCC_NPM-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP1]]
+; IS__CGSCC_NPM-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; IS__CGSCC_NPM-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]]
; IS__CGSCC_NPM-NEXT: ret i32 [[TMP7]]
;
entry:
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
index 0cbf280ca14d8..cc69909f02aa2 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
@@ -78,7 +78,7 @@ define void @fn_no_null_opt(i32* %P, i1 %C) null_pointer_is_valid {
; IS__TUNIT____-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
; IS__TUNIT____: if.end:
; IS__TUNIT____-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4
+; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4294967296
; IS__TUNIT____-NEXT: store i32 [[TMP0]], i32* [[P]], align 4
; IS__TUNIT____-NEXT: br label [[FOR_COND1]]
; IS__TUNIT____: exit:
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
index 3ed00e2abf0aa..49cb9f85ef53a 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
;
diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll
index b0ed4239abdd9..f3a72796ecef5 100644
--- a/llvm/test/Transforms/Attributor/align.ll
+++ b/llvm/test/Transforms/Attributor/align.ll
@@ -1171,13 +1171,13 @@ define i8* @aligned_8_return_caller(i8* align(16) %a, i1 %c1, i1 %c2) {
; NOT_CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@aligned_8_return_caller
; NOT_CGSCC_OPM-SAME: (i8* nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR9]] {
-; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR12:[0-9]+]]
+; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR12:[0-9]+]]
; NOT_CGSCC_OPM-NEXT: ret i8* [[R]]
;
; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@aligned_8_return_caller
; IS__CGSCC_OPM-SAME: (i8* nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR10]] {
-; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR13:[0-9]+]]
+; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR13:[0-9]+]]
; IS__CGSCC_OPM-NEXT: ret i8* [[R]]
;
%r = call i8* @aligned_8_return(i8* %a, i1 %c1, i1 %c2)
diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index 1342fd39fd67a..b69e78fbd268b 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
@@ -32,6 +32,7 @@ define internal i64 @f2(%"a"* %this) align 2 {
; IS__CGSCC____-LABEL: define {{[^@]+}}@f2
; IS__CGSCC____-SAME: () #[[ATTR0]] align 2 {
; IS__CGSCC____-NEXT: entry:
+; IS__CGSCC____-NEXT: [[TMP0:%.*]] = bitcast %a* undef to %b*
; IS__CGSCC____-NEXT: ret i64 undef
;
entry:
diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
index 24764f12a6475..03a8641f444de 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
@@ -111,7 +111,7 @@ define internal void @level2Kernela() {
; IS__TUNIT____-NEXT: entry:
; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
-; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR6:[0-9]+]]
+; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR6:[0-9]+]]
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: norecurse nosync nounwind
@@ -121,7 +121,7 @@ define internal void @level2Kernela() {
; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
-; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR4]]
+; IS__CGSCC____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR4]]
; IS__CGSCC____-NEXT: ret void
;
entry:
@@ -139,7 +139,7 @@ define internal void @level2Kernelb() {
; IS__TUNIT____-NEXT: entry:
; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
-; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR6]]
+; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR6]]
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: norecurse nosync nounwind
@@ -149,7 +149,7 @@ define internal void @level2Kernelb() {
; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
-; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR4]]
+; IS__CGSCC____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR4]]
; IS__CGSCC____-NEXT: ret void
;
entry:
@@ -303,7 +303,7 @@ define internal void @level2a(i32* %addr) {
; IS__TUNIT____-NEXT: entry:
; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]]
+; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]]
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind
@@ -313,7 +313,7 @@ define internal void @level2a(i32* %addr) {
; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4294967296
-; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR4]]
+; IS__CGSCC_OPM-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR4]]
; IS__CGSCC_OPM-NEXT: ret void
;
; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind
@@ -324,7 +324,7 @@ define internal void @level2a(i32* %addr) {
; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4
-; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR4]]
+; IS__CGSCC_NPM-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 17) #[[ATTR4]]
; IS__CGSCC_NPM-NEXT: ret void
;
entry:
@@ -342,7 +342,7 @@ define internal void @level2b(i32* %addr) {
; IS__TUNIT____-NEXT: entry:
; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]]
+; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]]
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind
@@ -352,7 +352,7 @@ define internal void @level2b(i32* %addr) {
; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4294967296
-; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR4]]
+; IS__CGSCC_OPM-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR4]]
; IS__CGSCC_OPM-NEXT: ret void
;
; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind
@@ -363,7 +363,7 @@ define internal void @level2b(i32* %addr) {
; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4
-; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR4]]
+; IS__CGSCC_NPM-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 17) #[[ATTR4]]
; IS__CGSCC_NPM-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index 2e68c60a3cff2..e1f550146c0ff 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -974,12 +974,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1106,9 +1107,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
@@ -1251,12 +1253,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1375,12 +1378,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1500,11 +1504,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
-; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1626,7 +1631,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU: if.then:
; AMDGPU-NEXT: br label [[RETURN:%.*]]
@@ -1949,12 +1955,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
; NVPTX-NEXT: ret void
;
;
@@ -2080,9 +2087,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
@@ -2224,12 +2232,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
; NVPTX-NEXT: ret void
;
;
@@ -2347,12 +2356,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
; NVPTX-NEXT: ret void
;
;
@@ -2471,11 +2481,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0)
-; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0)
; NVPTX-NEXT: ret void
;
;
@@ -2596,7 +2607,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX: if.then:
; NVPTX-NEXT: br label [[RETURN:%.*]]
@@ -2878,12 +2890,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2963,9 +2976,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3065,12 +3079,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3148,12 +3163,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3232,11 +3248,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
-; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3321,7 +3338,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU-DISABLED: if.then:
; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]]
@@ -3573,12 +3591,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3658,9 +3677,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3760,12 +3780,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3843,12 +3864,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3927,11 +3949,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
-; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -4016,7 +4039,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX-DISABLED: if.then:
; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]]
More information about the llvm-commits
mailing list