[llvm] ff31020 - [OpaquePtr][LoopAccessAnalysis] Support opaque pointers
Arthur Eubanks via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 9 09:15:33 PST 2022
Author: Arthur Eubanks
Date: 2022-02-09T09:11:27-08:00
New Revision: ff31020ee651d4867c5f1910fa0ded11cf8c2b13
URL: https://github.com/llvm/llvm-project/commit/ff31020ee651d4867c5f1910fa0ded11cf8c2b13
DIFF: https://github.com/llvm/llvm-project/commit/ff31020ee651d4867c5f1910fa0ded11cf8c2b13.diff
LOG: [OpaquePtr][LoopAccessAnalysis] Support opaque pointers
Previously we relied on the pointee type to determine what type we need
to do runtime pointer access checks.
With opaque pointers, we can access a pointer with more than one type,
so now we keep track of all the types we're accessing a pointer's
memory with.
Also some other minor getPointerElementType() removals.
Reviewed By: #opaque-pointers, nikic
Differential Revision: https://reviews.llvm.org/D119047
Added:
llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll
Modified:
llvm/include/llvm/Analysis/LoopAccessAnalysis.h
llvm/lib/Analysis/LoopAccessAnalysis.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 29587df5de95..3724fd49ba92 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -406,8 +406,8 @@ class RuntimePointerChecking {
/// according to the assumptions that we've made during the analysis.
/// The method might also version the pointer stride according to \p Strides,
/// and add new predicates to \p PSE.
- void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- unsigned ASId, const ValueToValueMap &Strides,
+ void insert(Loop *Lp, Value *Ptr, Type *AccessTy, bool WritePtr,
+ unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides,
PredicatedScalarEvolution &PSE);
/// No run-time memory checking is necessary.
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 88c437d9bcf7..828877926ea8 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -189,8 +189,9 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
///
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
-void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
- unsigned DepSetId, unsigned ASId,
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy,
+ bool WritePtr, unsigned DepSetId,
+ unsigned ASId,
const ValueToValueMap &Strides,
PredicatedScalarEvolution &PSE) {
// Get the stride replaced scev.
@@ -227,8 +228,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
// Add the size of the pointed element to ScEnd.
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
Type *IdxTy = DL.getIndexType(Ptr->getType());
- const SCEV *EltSizeSCEV =
- SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
+ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
@@ -522,19 +522,19 @@ class AccessAnalysis {
: TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
/// Register a load and whether it is only read from.
- void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
+ void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
- Accesses.insert(MemAccessInfo(Ptr, false));
+ Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
}
/// Register a store.
- void addStore(MemoryLocation &Loc) {
+ void addStore(MemoryLocation &Loc, Type *AccessTy) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
- Accesses.insert(MemAccessInfo(Ptr, true));
+ Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
}
/// Check if we can emit a run-time no-alias check for \p Access.
@@ -545,12 +545,11 @@ class AccessAnalysis {
/// we will attempt to use additional run-time checks in order to get
/// the bounds of the pointer.
bool createCheckForAccess(RuntimePointerChecking &RtCheck,
- MemAccessInfo Access,
+ MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &Strides,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
- unsigned ASId, bool ShouldCheckStride,
- bool Assume);
+ unsigned ASId, bool ShouldCheckStride, bool Assume);
/// Check whether we can check the pointers at runtime for
/// non-intersection.
@@ -583,14 +582,15 @@ class AccessAnalysis {
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
private:
- typedef SetVector<MemAccessInfo> PtrAccessSet;
+ typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
/// Go over all memory access and check whether runtime pointer checks
/// are needed and build sets of dependency check candidates.
void processMemAccesses();
- /// Set of all accesses.
- PtrAccessSet Accesses;
+ /// Map of all accesses. Values are the types used to access memory pointed to
+ /// by the pointer.
+ PtrAccessMap Accesses;
/// The loop being checked.
const Loop *TheLoop;
@@ -652,12 +652,12 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
/// Check whether a pointer address cannot wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE,
- const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
+ const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
+ Loop *L) {
const SCEV *PtrScev = PSE.getSCEV(Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
- Type *AccessTy = Ptr->getType()->getPointerElementType();
int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;
@@ -689,7 +689,7 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
}
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
- MemAccessInfo Access,
+ MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &StridesMap,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
@@ -702,7 +702,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
- if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
+ if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
auto *Expr = PSE.getSCEV(Ptr);
if (!Assume || !isa<SCEVAddRecExpr>(Expr))
return false;
@@ -723,11 +723,11 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
DepId = RunningDepId++;
bool IsWrite = Access.getInt();
- RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
+ RtCheck.insert(TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE);
LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
return true;
- }
+}
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
ScalarEvolution *SE, Loop *TheLoop,
@@ -788,12 +788,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
}
for (auto &Access : AccessInfos) {
- if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
- RunningDepId, ASId, ShouldCheckWrap, false)) {
- LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
- << *Access.getPointer() << '\n');
- Retries.push_back(Access);
- CanDoAliasSetRT = false;
+ for (auto &AccessTy : Accesses[Access]) {
+ if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+ DepSetId, TheLoop, RunningDepId, ASId,
+ ShouldCheckWrap, false)) {
+ LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
+ << *Access.getPointer() << '\n');
+ Retries.push_back(Access);
+ CanDoAliasSetRT = false;
+ }
}
}
@@ -815,13 +818,16 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
// We know that we need these checks, so we can now be more aggressive
// and add further checks if required (overflow checks).
CanDoAliasSetRT = true;
- for (auto Access : Retries)
- if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
- TheLoop, RunningDepId, ASId,
- ShouldCheckWrap, /*Assume=*/true)) {
- CanDoAliasSetRT = false;
- break;
+ for (auto Access : Retries) {
+ for (auto &AccessTy : Accesses[Access]) {
+ if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+ DepSetId, TheLoop, RunningDepId, ASId,
+ ShouldCheckWrap, /*Assume=*/true)) {
+ CanDoAliasSetRT = false;
+ break;
+ }
}
+ }
}
CanDoRT &= CanDoAliasSetRT;
@@ -886,9 +892,12 @@ void AccessAnalysis::processMemAccesses() {
LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
LLVM_DEBUG({
for (auto A : Accesses)
- dbgs() << "\t" << *A.getPointer() << " (" <<
- (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
- "read-only" : "read")) << ")\n";
+ dbgs() << "\t" << *A.first.getPointer() << " ("
+ << (A.first.getInt()
+ ? "write"
+ : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
+ : "read"))
+ << ")\n";
});
// The AliasSetTracker has nicely partitioned our pointers by metadata
@@ -907,13 +916,13 @@ void AccessAnalysis::processMemAccesses() {
UnderlyingObjToAccessMap ObjToLastAccess;
// Set of access to check after all writes have been processed.
- PtrAccessSet DeferredAccesses;
+ PtrAccessMap DeferredAccesses;
// Iterate over each alias set twice, once to process read/write pointers,
// and then to process read-only pointers.
for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
bool UseDeferred = SetIteration > 0;
- PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+ PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
for (const auto &AV : AS) {
Value *Ptr = AV.getValue();
@@ -921,10 +930,10 @@ void AccessAnalysis::processMemAccesses() {
// For a single memory access in AliasSetTracker, Accesses may contain
// both read and write, and they both need to be handled for CheckDeps.
for (const auto &AC : S) {
- if (AC.getPointer() != Ptr)
+ if (AC.first.getPointer() != Ptr)
continue;
- bool IsWrite = AC.getInt();
+ bool IsWrite = AC.first.getInt();
// If we're using the deferred access set, then it contains only
// reads.
@@ -946,7 +955,9 @@ void AccessAnalysis::processMemAccesses() {
// consecutive as "read-only" pointers (so that we check
// "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
if (!UseDeferred && IsReadOnlyPtr) {
- DeferredAccesses.insert(Access);
+ // We only use the pointer keys, the types vector values don't
+ // matter.
+ DeferredAccesses.insert({Access, {}});
continue;
}
@@ -1518,8 +1529,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
- Type *ATy = APtr->getType()->getPointerElementType();
- Type *BTy = BPtr->getType()->getPointerElementType();
+ Type *ATy = getLoadStoreType(InstMap[AIdx]);
+ Type *BTy = getLoadStoreType(InstMap[BIdx]);
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
@@ -1842,8 +1853,6 @@ bool LoopAccessInfo::canAnalyzeLoop() {
void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
const TargetLibraryInfo *TLI,
DominatorTree *DT) {
- typedef SmallPtrSet<Value*, 16> ValueSet;
-
// Holds the Load and Store instructions.
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
@@ -1975,11 +1984,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// for read and once for write, it will only appear once (on the write
// list). This is okay, since we are going to check for conflicts between
// writes and between reads and writes, but not between reads and reads.
- ValueSet Seen;
+ SmallSet<std::pair<Value *, Type *>, 16> Seen;
// Record uniform store addresses to identify if we have multiple stores
// to the same address.
- ValueSet UniformStores;
+ SmallPtrSet<Value *, 16> UniformStores;
for (StoreInst *ST : Stores) {
Value *Ptr = ST->getPointerOperand();
@@ -1990,7 +1999,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
- if (Seen.insert(Ptr).second) {
+ Type *AccessTy = getLoadStoreType(ST);
+ if (Seen.insert({Ptr, AccessTy}).second) {
++NumReadWrites;
MemoryLocation Loc = MemoryLocation::get(ST);
@@ -2001,9 +2011,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
Loc.AATags.TBAA = nullptr;
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
- [&Accesses, Loc](Value *Ptr) {
+ [&Accesses, AccessTy, Loc](Value *Ptr) {
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
- Accesses.addStore(NewLoc);
+ Accesses.addStore(NewLoc, AccessTy);
});
}
}
@@ -2027,7 +2037,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr).second ||
+ Type *AccessTy = getLoadStoreType(LD);
+ if (Seen.insert({Ptr, AccessTy}).second ||
!getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
++NumReads;
IsReadOnlyPtr = true;
@@ -2049,9 +2060,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
Loc.AATags.TBAA = nullptr;
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
- [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
+ [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
- Accesses.addLoad(NewLoc, IsReadOnlyPtr);
+ Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
});
}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types_opaque_ptr.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types_opaque_ptr.ll
new file mode 100644
index 000000000000..f09433069ad2
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types_opaque_ptr.ll
@@ -0,0 +1,179 @@
+; RUN: opt -S -disable-output --opaque-pointers -passes='print-access-info' < %s 2>&1 | FileCheck %s
+
+; In the function below some of the accesses are done as float types and some
+; are done as i32 types. When doing dependence analysis the type should not
+; matter if it can be determined that they are the same size.
+
+%int_pair = type { i32, i32 }
+
+; CHECK-LABEL: function 'backdep_type_size_equivalence':
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 800 bytes
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
+; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
+; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: BackwardVectorizable:
+; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 ->
+; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+
+define void @backdep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+ ;; Load from vec[indvars.iv].x as float
+ %gep.iv = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 0
+ %ld.f32 = load float, ptr %gep.iv, align 8
+ %val = fmul fast float %ld.f32, 5.0
+
+ ;; Store to vec[indvars.iv - 100].x as float
+ %indvars.iv.min.100 = add nsw i64 %indvars.iv, -100
+ %gep.iv.min.100 = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv.min.100, i32 0
+ store float %val, ptr %gep.iv.min.100, align 8
+
+ ;; Store to vec[indvars.iv].x as i32, creating a backward dependency between
+ ;; the two stores with
diff erent element types but the same element size.
+ %indvars.iv.i32 = trunc i64 %indvars.iv to i32
+ store i32 %indvars.iv.i32, ptr %gep.iv, align 8
+
+ ;; Store to vec[indvars.iv].y as i32, strided accesses should be independent
+ ;; between the two stores with
diff erent element types but the same element size.
+ %gep.iv.1 = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 1
+ store i32 %indvars.iv.i32, ptr %gep.iv.1, align 8
+
+ ;; Store to vec[indvars.iv + n].y as i32, to verify no dependence in the case
+ ;; of unknown dependence distance.
+ %indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
+ %gep.iv.n = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv.n, i32 1
+ store i32 %indvars.iv.i32, ptr %gep.iv.n, align 8
+
+ ;; Loop condition.
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %cond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; In the function below one of the accesses is done as i19 type, which has a
+;
diff erent store size than the i32 type, even though their alloc sizes are
+; equivalent. This is a negative test to ensure that they are not analyzed as
+; in the tests above.
+;
+; CHECK-LABEL: function 'backdep_type_store_size_equivalence':
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Unknown:
+; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
+; CHECK-NEXT: store i19 %indvars.iv.i19, ptr %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+
+define void @backdep_type_store_size_equivalence(ptr nocapture %vec, i64 %n) {
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+ ;; Load from vec[indvars.iv].x as float
+ %gep.iv = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 0
+ %ld.f32 = load float, ptr %gep.iv, align 8
+ %val = fmul fast float %ld.f32, 5.0
+
+ ;; Store to vec[indvars.iv].x as i19.
+ %indvars.iv.i19 = trunc i64 %indvars.iv to i19
+ store i19 %indvars.iv.i19, ptr %gep.iv, align 8
+
+ ;; Loop condition.
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %cond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; In the function below some of the accesses are done as double types and some
+; are done as i64 and i32 types. This is a negative test to ensure that they
+; are not analyzed as in the tests above.
+
+; CHECK-LABEL: function 'neg_dist_dep_type_size_equivalence':
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Unknown:
+; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
+; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: Unknown:
+; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 ->
+; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
+; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
+; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: ForwardButPreventsForwarding:
+; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 ->
+; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: Unknown:
+; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 ->
+; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+
+define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+ ;; Load from vec[indvars.iv] as double
+ %gep.iv = getelementptr i64, ptr %vec, i64 %indvars.iv
+ %ld.f64 = load double, ptr %gep.iv, align 8
+ %val = fmul fast double %ld.f64, 5.0
+
+ ;; Store to vec[indvars.iv + 101] as double
+ %indvars.iv.101 = add nsw i64 %indvars.iv, 101
+ %gep.iv.101 = getelementptr i64, ptr %vec, i64 %indvars.iv.101
+ store double %val, ptr %gep.iv.101, align 8
+
+ ;; Read from vec[indvars.iv] as i64 creating
+ ;; a forward but prevents forwarding dependence
+ ;; with
diff erent types but same sizes.
+ %ld.i64 = load i64, ptr %gep.iv, align 8
+
+ ;; Different sizes
+ %indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
+ %gep.iv.n = getelementptr inbounds i64, ptr %vec, i64 %indvars.iv.n
+ %ld.i64.i32 = trunc i64 %ld.i64 to i32
+ store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
+
+ ;; Loop condition.
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %cond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list