[llvm] [AArch64][SVE] Use loop.dependence.war.mask in vector.memcheck (PR #175943)

Benjamin Maxwell via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 10 08:33:22 PST 2026


================
@@ -2173,38 +2173,85 @@ Value *llvm::addRuntimeChecks(
   return MemoryRuntimeCheck;
 }
 
-Value *llvm::addDiffRuntimeChecks(
-    Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
-    function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
+Value *llvm::addDiffRuntimeChecks(Instruction *Loc,
+                                  ArrayRef<PointerDiffInfo> Checks,
+                                  SCEVExpander &Expander, ElementCount VF,
+                                  unsigned IC, const TargetTransformInfo &TTI,
+                                  TTI::TargetCostKind CostKind) {
 
   LLVMContext &Ctx = Loc->getContext();
   IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
   ChkBuilder.SetInsertPoint(Loc);
+  Value *RuntimeVF = nullptr;
   // Our instructions might fold to a constant.
   Value *MemoryRuntimeCheck = nullptr;
-
   auto &SE = *Expander.getSE();
+
+  constexpr Intrinsic::ID LoopDeskMaskIID = Intrinsic::loop_dependence_war_mask;
+  auto LoopDepMaskIsCheap = [&](Type *MaskTy, Value *AccessSize) {
+    Value *NullPtr = ConstantPointerNull::get(PointerType::getUnqual(Ctx));
+    // The pointer values should not change the cost. The access size (constant)
+    // is needed to by targets to cost the mask.
+    IntrinsicCostAttributes ICA(LoopDeskMaskIID, MaskTy,
+                                {NullPtr, NullPtr, AccessSize});
+    InstructionCost Cost = TTI.getIntrinsicInstrCost(ICA, CostKind);
+    return Cost.isValid() && Cost <= 1;
+  };
+
   // Map to keep track of created compares, The key is the pair of operands for
   // the compare, to allow detecting and re-using redundant compares.
   DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
   for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
+    Value *IsConflict;
+    Module *M = Loc->getModule();
     Type *Ty = SinkStart->getType();
-    // Compute VF * IC * AccessSize.
-    auto *VFTimesICTimesSize =
-        ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
-                             ConstantInt::get(Ty, IC * AccessSize));
-    Value *Diff =
-        Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
-
-    // Check if the same compare has already been created earlier. In that case,
-    // there is no need to check it again.
-    Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
-    if (IsConflict)
-      continue;
+    Type *CheckTy = ChkBuilder.getIntNTy(Ty->getScalarSizeInBits());
+
+    VectorType *MaskTy = VectorType::get(ChkBuilder.getInt1Ty(), VF * IC);
+    Value *LoopAccessSize = ChkBuilder.getInt64(AccessSize);
+    if (!LoopDepMaskIsCheap(MaskTy, LoopAccessSize)) {
+      // Compute VF * IC * AccessSize.
+      if (!RuntimeVF)
+        RuntimeVF = ChkBuilder.CreateElementCount(CheckTy, VF);
+      auto *VFTimesICTimesSize = ChkBuilder.CreateMul(
+          RuntimeVF, ConstantInt::get(Ty, IC * AccessSize));
+      Value *Diff =
+          Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
+
+      // Check if the same compare has already been created earlier. In that
+      // case, there is no need to check it again.
+      IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
+      if (IsConflict)
+        continue;
 
-    IsConflict =
-        ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
-    SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});
+      IsConflict =
+          ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
+      SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});
+    } else {
+      Function *LoopDepMaskIntr =
+          Intrinsic::getOrInsertDeclaration(M, LoopDeskMaskIID, {MaskTy});
+
+      Value *Src = Expander.expandCodeFor(SrcStart, Ty, Loc);
+      Value *SrcPtr = ChkBuilder.CreateIntToPtr(Src, ChkBuilder.getPtrTy());
+      Value *Sink = Expander.expandCodeFor(SinkStart, Ty, Loc);
+      Value *SinkPtr = ChkBuilder.CreateIntToPtr(Sink, ChkBuilder.getPtrTy());
+      Value *Mask = ChkBuilder.CreateCall(
+          LoopDepMaskIntr->getFunctionType(), LoopDepMaskIntr,
+          {SrcPtr, SinkPtr, LoopAccessSize}, "loop.dep.mask");
+
+      IsConflict = SeenCompares.lookup({Mask, nullptr});
+      if (IsConflict)
+        continue;
+
----------------
MacDue wrote:

This was not working (since the `SCEV` diff allows for common offsets to fold away). I've reworked this so we create: `loop.dependence.war.mask(ptr null, ptr %diff)` (and cache on `%diff`), a later inst/dag combine can fold `%diff` into the mask if it's a sub (TODO). 

https://github.com/llvm/llvm-project/pull/175943


More information about the llvm-commits mailing list