[llvm] 71c3acb - [Analysis][AArch64] Add cost model for loop.dependence.{war/raw}.mask (#167551)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 01:15:20 PST 2025


Author: Sam Tebbs
Date: 2025-12-12T09:15:16Z
New Revision: 71c3acb18b9e9d11badd5a11945cb1afc2fb540f

URL: https://github.com/llvm/llvm-project/commit/71c3acb18b9e9d11badd5a11945cb1afc2fb540f
DIFF: https://github.com/llvm/llvm-project/commit/71c3acb18b9e9d11badd5a11945cb1afc2fb540f.diff

LOG: [Analysis][AArch64] Add cost model for loop.dependence.{war/raw}.mask (#167551)

This PR adds the cost model for the loop dependence mask intrinsics,
both for cases where they must be expanded and when they can be lowered
for AArch64.

---------

Co-authored-by: Benjamin Maxwell <benjamin.maxwell at arm.com>

Added: 
    llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 129a1971981d5..0b60c38f0ce8c 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2139,6 +2139,56 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       // Otherwise, fallback to default scalarization cost.
       break;
     }
+    case Intrinsic::loop_dependence_war_mask:
+    case Intrinsic::loop_dependence_raw_mask: {
+      // Compute the cost of the expanded version of these intrinsics:
+      //
+      // The possible expansions are...
+      //
+      // loop_dependence_war_mask:
+      //   
diff  = (ptrB - ptrA) / eltSize
+      //   cmp = icmp sle 
diff , 0
+      //   upper_bound = select cmp, -1, 
diff 
+      //   mask = get_active_lane_mask 0, upper_bound
+      //
+      // loop_dependence_raw_mask:
+      //   
diff  = (abs(ptrB - ptrA)) / eltSize
+      //   cmp = icmp eq 
diff , 0
+      //   upper_bound = select cmp, -1, 
diff 
+      //   mask = get_active_lane_mask 0, upper_bound
+      //
+      auto *PtrTy = cast<PointerType>(ICA.getArgTypes()[0]);
+      Type *IntPtrTy = IntegerType::getIntNTy(
+          RetTy->getContext(), thisT()->getDataLayout().getPointerSizeInBits(
+                                   PtrTy->getAddressSpace()));
+      bool IsReadAfterWrite = IID == Intrinsic::loop_dependence_raw_mask;
+
+      InstructionCost Cost =
+          thisT()->getArithmeticInstrCost(Instruction::Sub, IntPtrTy, CostKind);
+      if (IsReadAfterWrite) {
+        IntrinsicCostAttributes AbsAttrs(Intrinsic::abs, IntPtrTy, {IntPtrTy},
+                                         {});
+        Cost += thisT()->getIntrinsicInstrCost(AbsAttrs, CostKind);
+      }
+
+      TTI::OperandValueInfo EltSizeOpInfo =
+          TTI::getOperandInfo(ICA.getArgs()[2]);
+      Cost += thisT()->getArithmeticInstrCost(Instruction::SDiv, IntPtrTy,
+                                              CostKind, {}, EltSizeOpInfo);
+
+      Type *CondTy = IntegerType::getInt1Ty(RetTy->getContext());
+      CmpInst::Predicate Pred =
+          IsReadAfterWrite ? CmpInst::ICMP_EQ : CmpInst::ICMP_SLE;
+      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CondTy,
+                                          IntPtrTy, Pred, CostKind);
+      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, IntPtrTy,
+                                          CondTy, Pred, CostKind);
+
+      IntrinsicCostAttributes Attrs(Intrinsic::get_active_lane_mask, RetTy,
+                                    {IntPtrTy, IntPtrTy}, FMF);
+      Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
+      return Cost;
+    }
     }
 
     // Assume that we need to scalarize this intrinsic.)

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 043be554f8441..b8d408dc6623b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1064,6 +1064,19 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     }
     break;
   }
+  case Intrinsic::loop_dependence_raw_mask:
+  case Intrinsic::loop_dependence_war_mask: {
+    // The whilewr/rw instructions require SVE2 or SME.
+    if (ST->hasSVE2() || ST->hasSME()) {
+      EVT VecVT = getTLI()->getValueType(DL, RetTy);
+      unsigned EltSizeInBytes =
+          cast<ConstantInt>(ICA.getArgs()[2])->getZExtValue();
+      if (is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes) &&
+          VecVT.getVectorMinNumElements() == (16 / EltSizeInBytes))
+        return 1;
+    }
+    break;
+  }
   case Intrinsic::experimental_vector_extract_last_active:
     if (ST->isSVEorStreamingSVEAvailable()) {
       auto [LegalCost, _] = getTypeLegalizationCost(ICA.getArgTypes()[0]);

diff  --git a/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll
new file mode 100644
index 0000000000000..5b3070fcf347e
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-EXPANDED
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s --check-prefixes=CHECK,CHECK-SVE2
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sme | FileCheck %s --check-prefixes=CHECK,CHECK-SME
+
+; loop.dependence.{war,raw}.mask can be lowered to while{wr,rw} if SVE2 or SME is enabled.
+define void @loop_dependence_war_mask(ptr %a, ptr %b) {
+; CHECK-EXPANDED-LABEL: 'loop_dependence_war_mask'
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 1)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 2)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 4)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 8)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-LABEL: 'loop_dependence_war_mask'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 1)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1)
+  %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2)
+  %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4)
+  %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8)
+
+  %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1)
+  %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2)
+  %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4)
+  %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8)
+
+  ret void
+}
+
+define void @loop_dependence_raw_mask(ptr %a, ptr %b) {
+; CHECK-EXPANDED-LABEL: 'loop_dependence_raw_mask'
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 1)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 2)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 4)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 8)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-LABEL: 'loop_dependence_raw_mask'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 1)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1)
+  %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2)
+  %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4)
+  %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8)
+
+  %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1)
+  %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2)
+  %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4)
+  %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8)
+  ret void
+}
+
+; Invalid element size and return type combinations must be expanded, even with sve2/sme
+define void @loop_dependence_war_mask_invalid(ptr %a, ptr %b) {
+; CHECK-EXPANDED-LABEL: 'loop_dependence_war_mask_invalid'
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 8)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 4)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 2)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 1)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 10)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE2-LABEL: 'loop_dependence_war_mask_invalid'
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 8)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 4)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 2)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 1)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 10)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SME-LABEL: 'loop_dependence_war_mask_invalid'
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 8)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 4)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 2)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 1)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 10)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8)
+  %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4)
+  %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2)
+  %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1)
+  %res5 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10)
+
+  %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8)
+  %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4)
+  %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2)
+  %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1)
+  %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10)
+  ret void
+}
+
+define void @loop_dependence_raw_mask_invalid(ptr %a, ptr %b) {
+; CHECK-EXPANDED-LABEL: 'loop_dependence_raw_mask_invalid'
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 8)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 4)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 2)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 1)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 10)
+; CHECK-EXPANDED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE2-LABEL: 'loop_dependence_raw_mask_invalid'
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 8)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 4)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 2)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 1)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 10)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SME-LABEL: 'loop_dependence_raw_mask_invalid'
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 8)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 4)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 2)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 1)
+; CHECK-SME-NEXT:  Cost Model: Invalid cost for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 10)
+; CHECK-SME-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8)
+  %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4)
+  %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2)
+  %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1)
+  %res5 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10)
+
+  %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8)
+  %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4)
+  %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2)
+  %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1)
+  %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10)
+  ret void
+}


        


More information about the llvm-commits mailing list