[llvm] [AArch64][SVE] Use loop.dependence.war.mask in vector.memcheck (PR #175943)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 08:33:22 PST 2026
================
@@ -2173,38 +2173,85 @@ Value *llvm::addRuntimeChecks(
return MemoryRuntimeCheck;
}
-Value *llvm::addDiffRuntimeChecks(
- Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
- function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
+Value *llvm::addDiffRuntimeChecks(Instruction *Loc,
+ ArrayRef<PointerDiffInfo> Checks,
+ SCEVExpander &Expander, ElementCount VF,
+ unsigned IC, const TargetTransformInfo &TTI,
+ TTI::TargetCostKind CostKind) {
LLVMContext &Ctx = Loc->getContext();
IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
ChkBuilder.SetInsertPoint(Loc);
+ Value *RuntimeVF = nullptr;
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
-
auto &SE = *Expander.getSE();
+
+ constexpr Intrinsic::ID LoopDeskMaskIID = Intrinsic::loop_dependence_war_mask;
+ auto LoopDepMaskIsCheap = [&](Type *MaskTy, Value *AccessSize) {
+ Value *NullPtr = ConstantPointerNull::get(PointerType::getUnqual(Ctx));
+ // The pointer values should not change the cost. The access size (constant)
+ // is needed to by targets to cost the mask.
+ IntrinsicCostAttributes ICA(LoopDeskMaskIID, MaskTy,
+ {NullPtr, NullPtr, AccessSize});
+ InstructionCost Cost = TTI.getIntrinsicInstrCost(ICA, CostKind);
+ return Cost.isValid() && Cost <= 1;
+ };
+
// Map to keep track of created compares, The key is the pair of operands for
// the compare, to allow detecting and re-using redundant compares.
DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
+ Value *IsConflict;
+ Module *M = Loc->getModule();
Type *Ty = SinkStart->getType();
- // Compute VF * IC * AccessSize.
- auto *VFTimesICTimesSize =
- ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
- ConstantInt::get(Ty, IC * AccessSize));
- Value *Diff =
- Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
-
- // Check if the same compare has already been created earlier. In that case,
- // there is no need to check it again.
- Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
- if (IsConflict)
- continue;
+ Type *CheckTy = ChkBuilder.getIntNTy(Ty->getScalarSizeInBits());
+
+ VectorType *MaskTy = VectorType::get(ChkBuilder.getInt1Ty(), VF * IC);
+ Value *LoopAccessSize = ChkBuilder.getInt64(AccessSize);
+ if (!LoopDepMaskIsCheap(MaskTy, LoopAccessSize)) {
+ // Compute VF * IC * AccessSize.
+ if (!RuntimeVF)
+ RuntimeVF = ChkBuilder.CreateElementCount(CheckTy, VF);
+ auto *VFTimesICTimesSize = ChkBuilder.CreateMul(
+ RuntimeVF, ConstantInt::get(Ty, IC * AccessSize));
+ Value *Diff =
+ Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
+
+ // Check if the same compare has already been created earlier. In that
+ // case, there is no need to check it again.
+ IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
+ if (IsConflict)
+ continue;
- IsConflict =
- ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
- SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});
+ IsConflict =
+ ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
+ SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});
+ } else {
+ Function *LoopDepMaskIntr =
+ Intrinsic::getOrInsertDeclaration(M, LoopDeskMaskIID, {MaskTy});
+
+ Value *Src = Expander.expandCodeFor(SrcStart, Ty, Loc);
+ Value *SrcPtr = ChkBuilder.CreateIntToPtr(Src, ChkBuilder.getPtrTy());
+ Value *Sink = Expander.expandCodeFor(SinkStart, Ty, Loc);
+ Value *SinkPtr = ChkBuilder.CreateIntToPtr(Sink, ChkBuilder.getPtrTy());
+ Value *Mask = ChkBuilder.CreateCall(
+ LoopDepMaskIntr->getFunctionType(), LoopDepMaskIntr,
+ {SrcPtr, SinkPtr, LoopAccessSize}, "loop.dep.mask");
+
+ IsConflict = SeenCompares.lookup({Mask, nullptr});
+ if (IsConflict)
+ continue;
+
----------------
MacDue wrote:
This was not working (since the `SCEV` diff allows for common offsets to fold away). I've reworked this so we create: `loop.dependence.war.mask(ptr null, ptr %diff)` (and cache on `%diff`), a later inst/dag combine can fold `%diff` into the mask if it's a sub (TODO).
https://github.com/llvm/llvm-project/pull/175943
More information about the llvm-commits
mailing list