[llvm] cad62df - [Loads] Support dereferenceable assumption with variable size. (#128436)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 00:17:37 PDT 2025
Author: Florian Hahn
Date: 2025-07-14T08:17:33+01:00
New Revision: cad62df49a79df5e5136cfad280c5abc9f62c60b
URL: https://github.com/llvm/llvm-project/commit/cad62df49a79df5e5136cfad280c5abc9f62c60b
DIFF: https://github.com/llvm/llvm-project/commit/cad62df49a79df5e5136cfad280c5abc9f62c60b.diff
LOG: [Loads] Support dereferenceable assumption with variable size. (#128436)
Update isDereferenceableAndAlignedPointer to make use of dereferenceable
assumptions with variable sizes via SCEV.
To do so, factor out the logic to check via an assumption to a helper,
and use SE to check if the access size is less than the dereferenceable
size.
PR: https://github.com/llvm/llvm-project/pull/128436
Added:
llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
Modified:
llvm/include/llvm/Analysis/AssumeBundleQueries.h
llvm/lib/Analysis/AssumeBundleQueries.cpp
llvm/lib/Analysis/Loads.cpp
llvm/lib/IR/Verifier.cpp
llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
llvm/test/Verifier/assume-bundles.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index 0d39339c4f7d7..eb537e5f11afc 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -102,10 +102,14 @@ LLVM_ABI void fillMapFromAssume(AssumeInst &Assume,
struct RetainedKnowledge {
Attribute::AttrKind AttrKind = Attribute::None;
uint64_t ArgValue = 0;
+ Value *IRArgValue = nullptr;
Value *WasOn = nullptr;
+ RetainedKnowledge(Attribute::AttrKind AttrKind = Attribute::None,
+ uint64_t ArgValue = 0, Value *WasOn = nullptr)
+ : AttrKind(AttrKind), ArgValue(ArgValue), WasOn(WasOn) {}
bool operator==(RetainedKnowledge Other) const {
return AttrKind == Other.AttrKind && WasOn == Other.WasOn &&
- ArgValue == Other.ArgValue;
+ ArgValue == Other.ArgValue && IRArgValue == Other.IRArgValue;
}
bool operator!=(RetainedKnowledge Other) const { return !(*this == Other); }
/// This is only intended for use in std::min/std::max between attribute that
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index 4c890a5d21b54..aa1cb3c26d63f 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -114,6 +114,9 @@ llvm::getKnowledgeFromBundle(AssumeInst &Assume,
};
if (BOI.End - BOI.Begin > ABA_Argument)
Result.ArgValue = GetArgOr1(0);
+ Result.IRArgValue = bundleHasArgument(BOI, ABA_Argument)
+ ? getValueFromBundleOpInfo(Assume, BOI, ABA_Argument)
+ : nullptr;
if (Result.AttrKind == Attribute::Alignment)
if (BOI.End - BOI.Begin > ABA_Argument + 1)
Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1));
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 880249588f0b2..89e42ff4e8029 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -31,6 +31,40 @@ static bool isAligned(const Value *Base, Align Alignment,
return Base->getPointerAlignment(DL) >= Alignment;
}
+static bool isDereferenceableAndAlignedPointerViaAssumption(
+ const Value *Ptr, Align Alignment,
+ function_ref<bool(const RetainedKnowledge &RK)> CheckSize,
+ const DataLayout &DL, const Instruction *CtxI, AssumptionCache *AC,
+ const DominatorTree *DT) {
+ // Dereferenceable information from assumptions is only valid if the value
+ // cannot be freed between the assumption and use. For now just use the
+ // information for values that cannot be freed in the function.
+ // TODO: More precisely check if the pointer can be freed between assumption
+ // and use.
+ if (!CtxI || Ptr->canBeFreed())
+ return false;
+ /// Look through assumes to see if both dereferencability and alignment can
+ /// be proven by an assume if needed.
+ RetainedKnowledge AlignRK;
+ RetainedKnowledge DerefRK;
+ bool IsAligned = Ptr->getPointerAlignment(DL) >= Alignment;
+ return getKnowledgeForValue(
+ Ptr, {Attribute::Dereferenceable, Attribute::Alignment}, *AC,
+ [&](RetainedKnowledge RK, Instruction *Assume, auto) {
+ if (!isValidAssumeForContext(Assume, CtxI, DT))
+ return false;
+ if (RK.AttrKind == Attribute::Alignment)
+ AlignRK = std::max(AlignRK, RK);
+ if (RK.AttrKind == Attribute::Dereferenceable)
+ DerefRK = std::max(DerefRK, RK);
+ IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value();
+ if (IsAligned && DerefRK && CheckSize(DerefRK))
+ return true; // We have found what we needed so we stop looking
+ return false; // Other assumes may have better information. so
+ // keep looking
+ });
+}
+
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
static bool isDereferenceableAndAlignedPointer(
@@ -169,38 +203,12 @@ static bool isDereferenceableAndAlignedPointer(
Size, DL, CtxI, AC, DT, TLI,
Visited, MaxDepth);
- // Dereferenceable information from assumptions is only valid if the value
- // cannot be freed between the assumption and use. For now just use the
- // information for values that cannot be freed in the function.
- // TODO: More precisely check if the pointer can be freed between assumption
- // and use.
- if (CtxI && AC && !V->canBeFreed()) {
- /// Look through assumes to see if both dereferencability and alignment can
- /// be proven by an assume if needed.
- RetainedKnowledge AlignRK;
- RetainedKnowledge DerefRK;
- bool IsAligned = V->getPointerAlignment(DL) >= Alignment;
- if (getKnowledgeForValue(
- V, {Attribute::Dereferenceable, Attribute::Alignment}, *AC,
- [&](RetainedKnowledge RK, Instruction *Assume, auto) {
- if (!isValidAssumeForContext(Assume, CtxI, DT))
- return false;
- if (RK.AttrKind == Attribute::Alignment)
- AlignRK = std::max(AlignRK, RK);
- if (RK.AttrKind == Attribute::Dereferenceable)
- DerefRK = std::max(DerefRK, RK);
- IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value();
- if (IsAligned && DerefRK &&
- DerefRK.ArgValue >= Size.getZExtValue())
- return true; // We have found what we needed so we stop looking
- return false; // Other assumes may have better information. so
- // keep looking
- }))
- return true;
- }
-
- // If we don't know, assume the worst.
- return false;
+ return AC && isDereferenceableAndAlignedPointerViaAssumption(
+ V, Alignment,
+ [Size](const RetainedKnowledge &RK) {
+ return RK.ArgValue >= Size.getZExtValue();
+ },
+ DL, CtxI, AC, DT);
}
bool llvm::isDereferenceableAndAlignedPointer(
@@ -317,8 +325,8 @@ bool llvm::isDereferenceableAndAlignedInLoop(
return false;
const SCEV *MaxBECount =
- Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
- : SE.getConstantMaxBackedgeTakenCount(L);
+ Predicates ? SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, *Predicates)
+ : SE.getSymbolicMaxBackedgeTakenCount(L);
const SCEV *BECount = Predicates
? SE.getPredicatedBackedgeTakenCount(L, *Predicates)
: SE.getBackedgeTakenCount(L);
@@ -339,9 +347,11 @@ bool llvm::isDereferenceableAndAlignedInLoop(
Value *Base = nullptr;
APInt AccessSize;
+ const SCEV *AccessSizeSCEV = nullptr;
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) {
Base = NewBase->getValue();
AccessSize = MaxPtrDiff;
+ AccessSizeSCEV = PtrDiff;
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
if (MinAdd->getNumOperands() != 2)
return false;
@@ -365,12 +375,20 @@ bool llvm::isDereferenceableAndAlignedInLoop(
return false;
AccessSize = MaxPtrDiff + Offset->getAPInt();
+ AccessSizeSCEV = SE.getAddExpr(PtrDiff, Offset);
Base = NewBase->getValue();
} else
return false;
Instruction *HeaderFirstNonPHI = &*L->getHeader()->getFirstNonPHIIt();
- return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
+ return isDereferenceableAndAlignedPointerViaAssumption(
+ Base, Alignment,
+ [&SE, AccessSizeSCEV](const RetainedKnowledge &RK) {
+ return SE.isKnownPredicate(CmpInst::ICMP_ULE, AccessSizeSCEV,
+ SE.getSCEV(RK.IRArgValue));
+ },
+ DL, HeaderFirstNonPHI, AC, &DT) ||
+ isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
HeaderFirstNonPHI, AC, &DT);
}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index eb747bc48a8a5..8004077b92665 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5604,6 +5604,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"third argument should be an integer if present", Call);
continue;
}
+ if (Kind == Attribute::Dereferenceable) {
+ Check(ArgCount == 2,
+ "dereferenceable assumptions should have 2 arguments", Call);
+ Check(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
+ "first argument should be a pointer", Call);
+ Check(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
+ "second argument should be an integer", Call);
+ continue;
+ }
Check(ArgCount <= 2, "too many arguments", Call);
if (Kind == Attribute::None)
break;
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
new file mode 100644
index 0000000000000..e771c408358a1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
@@ -0,0 +1,557 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck %s
+
+declare void @llvm.assume(i1)
+
+; %a is known dereferenceable via assume for the whole loop.
+define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
+; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(
+; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i8> [[WIDE_LOAD]], <2 x i8> [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
+; CHECK-NEXT: store <2 x i8> [[PREDPHI]], ptr [[TMP7]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[L_B:%.*]] = load i8, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[C_1:%.*]] = icmp sge i8 [[L_B]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
+; CHECK: [[LOOP_THEN]]:
+; CHECK-NEXT: [[L_A:%.*]] = load i8, ptr [[GEP_A]], align 1
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i8 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i8 [[MERGE]], ptr [[GEP_C]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %n) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.a = getelementptr inbounds i8, ptr %a, i64 %iv
+ %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv
+ %l.b = load i8, ptr %gep.b, align 1
+ %c.1 = icmp sge i8 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %l.a = load i8, ptr %gep.a, align 1
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i8 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i8, ptr %c, i64 %iv
+ store i8 %merge, ptr %gep.c, align 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+; %a is known dereferenceable via assume for the whole loop.
+define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
+; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(
+; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[MUL]]) ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
+; CHECK: [[LOOP_THEN]]:
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %mul = mul nsw nuw i64 %n, 4
+ call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %mul) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
+ %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
+ %l.b = load i32, ptr %gep.b, align 1
+ %c.1 = icmp sge i32 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %l.a = load i32, ptr %gep.a, align 1
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
+ store i32 %merge, ptr %gep.c, align 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+
+; %a is NOT known dereferenceable via assume for the whole loop.
+define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
+; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32(
+; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
+; CHECK: [[LOOP_THEN]]:
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %n) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
+ %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
+ %l.b = load i32, ptr %gep.b, align 1
+ %c.1 = icmp sge i32 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %l.a = load i32, ptr %gep.a, align 1
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
+ store i32 %merge, ptr %gep.c, align 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+; %a is NOT known dereferenceable via assume for the whole loop.
+define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
+; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32(
+; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 100) ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
+; CHECK: [[LOOP_THEN]]:
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 100) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
+ %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
+ %l.b = load i32, ptr %gep.b, align 1
+ %c.1 = icmp sge i32 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %l.a = load i32, ptr %gep.a, align 1
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
+ store i32 %merge, ptr %gep.c, align 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+; %a is known dereferenceable via assume for the whole loop, alignment is known via function attribute.
+define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_align_attribute(ptr noalias noundef align 4 %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
+; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_align_attribute(
+; CHECK-SAME: ptr noalias noundef align 4 [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 [[MUL]]) ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP0]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
+; CHECK: [[LOOP_THEN]]:
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %mul = mul nsw nuw i64 %n, 4
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 %mul) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
+ %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
+ %l.b = load i32, ptr %gep.b, align 4
+ %c.1 = icmp sge i32 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %l.a = load i32, ptr %gep.a, align 4
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
+ store i32 %merge, ptr %gep.c, align 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+; Alignment via argument attribute is too small (1 but needs 4).
+define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_align_attribute_too_small(ptr noalias noundef align 1 %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
+; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_align_attribute_too_small(
+; CHECK-SAME: ptr noalias noundef align 1 [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 [[MUL]]) ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
+; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
+; CHECK: [[LOOP_THEN]]:
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %mul = mul nsw nuw i64 %n, 4
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 %mul) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
+ %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
+ %l.b = load i32, ptr %gep.b, align 4
+ %c.1 = icmp sge i32 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %l.a = load i32, ptr %gep.a, align 4
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
+ store i32 %merge, ptr %gep.c, align 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 13fd2eff87290..84d5ceeb601b6 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -165,6 +165,35 @@ exit:
ret void
}
+define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+entry:
+ %n_bytes = mul nuw nsw i32 %n, 2
+ call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ]
+ %tc = sext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, %tc
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
define void @loop_contains_store_to_pointer_with_no_deref_info(ptr align 2 dereferenceable(40) readonly %load.array, ptr align 2 noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_to_pointer_with_no_deref_info'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
index 0fe893abec86c..e79995f673fb4 100644
--- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
@@ -94,3 +94,50 @@ loop.end:
%retval = phi i64 [ %index, %loop ], [ -1, %loop.inc ]
ret i64 %retval
}
+
+define i64 @early_exit_alignment_and_deref_known_via_assumption(ptr noalias %p1, ptr noalias %p2, i64 %n) nofree nosync {
+; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption(
+; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 [[N]]) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 [[N]]) ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ call void @llvm.assume(i1 true) [ "align"(ptr %p1, i64 4), "dereferenceable"(ptr %p1, i64 %n) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 4), "dereferenceable"(ptr %p2, i64 %n) ]
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, %n
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ -1, %loop.inc ]
+ ret i64 %retval
+}
diff --git a/llvm/test/Verifier/assume-bundles.ll b/llvm/test/Verifier/assume-bundles.ll
index 047e8d9bba3ed..d8037b965edb5 100644
--- a/llvm/test/Verifier/assume-bundles.ll
+++ b/llvm/test/Verifier/assume-bundles.ll
@@ -7,13 +7,13 @@ define void @func(ptr %P, i32 %P1, ptr %P2, ptr %P3) {
; CHECK: tags must be valid attribute names
; CHECK: "adazdazd"
call void @llvm.assume(i1 true) ["adazdazd"()]
-; CHECK: the second argument should be a constant integral value
+; CHECK-NOT: call{{.+}}deref
call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P, i32 %P1)]
-; CHECK: the second argument should be a constant integral value
+; CHECK: second argument should be an integer
call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P, float 1.5)]
-; CHECK: too many arguments
+; CHECK: dereferenceable assumptions should have 2 arguments
call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P, i32 8, i32 8)]
-; CHECK: this attribute should have 2 arguments
+; CHECK: dereferenceable assumptions should have 2 arguments
call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P)]
; CHECK: this attribute has no argument
call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P, i32 4), "cold"(ptr %P)]
@@ -30,7 +30,7 @@ define void @func(ptr %P, i32 %P1, ptr %P2, ptr %P3) {
call void @llvm.assume(i1 true) ["separate_storage"(ptr %P)]
; CHECK: arguments to separate_storage assumptions should be pointers
call void @llvm.assume(i1 true) ["separate_storage"(ptr %P, i32 123)]
-; CHECK: this attribute should have 2 arguments
+; CHECK: dereferenceable assumptions should have 2 arguments
call void @llvm.assume(i1 true) ["align"(ptr %P, i32 4), "dereferenceable"(ptr %P)]
ret void
}
More information about the llvm-commits
mailing list