[llvm] [AArch64][CostModel] Reduce cost of wider than legal get.active.lane.mask (PR #163786)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 16 06:51:16 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Kerry McLaughlin (kmclaughlin-arm)
<details>
<summary>Changes</summary>
getIntrinsicInstrCost should halve the cost returned by getTypeLegalizationCost
when the return type requires splitting, but we know that the whilelo
(predicate pair) instruction can be used.
---
Full diff: https://github.com/llvm/llvm-project/pull/163786.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+19-5)
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+54-23)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 479e34515fc8a..c0e9b24486b6f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -957,10 +957,24 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return TyL.first + ExtraCost;
}
case Intrinsic::get_active_lane_mask: {
- auto *RetTy = dyn_cast<FixedVectorType>(ICA.getReturnType());
- if (RetTy) {
- EVT RetVT = getTLI()->getValueType(DL, RetTy);
- EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
+ auto RetTy = cast<VectorType>(ICA.getReturnType());
+ EVT RetVT = getTLI()->getValueType(DL, RetTy);
+ EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
+ if (RetTy->isScalableTy()) {
+ // When SVE2p1 or SME2 is available, get_active_lane_mask will lower
+ // to the sve_whilelo_x2 intrinsic which returns a predicate pair.
+ // This means we can halve getTypeLegalizationCost, since the
+ // predicate pair intrinsic will split the result, e.g.
+ // nxv32i1 = get_active_lane_mask(base, idx) ->
+ // {nxv16i1, nxv16i1} = sve_whilelo_x2(base, idx)
+ if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) ||
+ (!ST->hasSVE2p1() && !ST->hasSME2()) ||
+ TLI->getTypeAction(RetTy->getContext(), RetVT) !=
+ TargetLowering::TypeSplitVector)
+ break;
+ auto LT = getTypeLegalizationCost(RetTy);
+ return LT.first / 2;
+ } else {
if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
!getTLI()->isTypeLegal(RetVT)) {
// We don't have enough context at this point to determine if the mask
@@ -972,7 +986,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
// NOTE: getScalarizationOverhead returns a cost that's far too
// pessimistic for the actual generated codegen. In reality there are
// two instructions generated per lane.
- return RetTy->getNumElements() * 2;
+ return cast<FixedVectorType>(RetTy)->getNumElements() * 2;
}
}
break;
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 0976a108cfb2c..a3992b017f3c4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-1
-; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-2
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2p1 | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE2p1-OR-SME2
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sme2 | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE2p1-OR-SME2
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -intrinsic-cost-strategy=type-based-intrinsic-cost -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=TYPE_BASED_ONLY
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -920,6 +922,7 @@ define void @get_lane_mask() #0 {
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
@@ -934,28 +937,53 @@ define void @get_lane_mask() #0 {
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
-; CHECK-VSCALE-2-LABEL: 'get_lane_mask'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-SVE-LABEL: 'get_lane_mask'
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
+; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-SVE2p1-OR-SME2-LABEL: 'get_lane_mask'
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
+; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; TYPE_BASED_ONLY-LABEL: 'get_lane_mask'
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
@@ -966,6 +994,7 @@ define void @get_lane_mask() #0 {
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
@@ -990,6 +1019,7 @@ define void @get_lane_mask() #0 {
%mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
%mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
+ %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
%mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
%mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
@@ -1416,6 +1446,7 @@ declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32)
declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32)
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32)
declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32, i32)
+declare <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64, i64)
declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64, i64)
declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16, i16)
declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64)
``````````
</details>
https://github.com/llvm/llvm-project/pull/163786
More information about the llvm-commits
mailing list