[llvm] [Analysis] Extend llvm.experimental.cttz.elts to type-based-cost (PR #184578)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 4 01:57:52 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Philipp Rados (prados-oc)

<details>
<summary>Changes</summary>

This patch fixes a crash on assertion when building the llvm-testsuite for `-march=rv64gcv_zvl16384b`.

This happens because the VPlan builds a fixed-vector plan for mvt 2048i1 which is illegal in RISCV. `llvm.experimental.cttz.elts` then cannot be custom lowered and will be expanded and thus `getTypeBasedIntrinsicInstrCost()` tries to scalarize and crashes (for RISCV, for aarch64 it just returns a cost=1 because it will recur until all arguments are scalar already).
This occurs for all `llvm.experimental.cttz.elts` that have to be expanded.

This patch uses the existing value-based cost-modelling for the the type-based path as well. Instead of getting the actual `ZeroIsPoison` value it sets it to false, since that is the default that the vectorizer emits.

I haven't tested this for ARM targets, but if needed I can add tests for that as well.

---

Patch is 24.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184578.diff


2 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+53-37) 
- (modified) llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll (+125) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 6dcb6f0062a08..d30d5e0d815b5 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2100,44 +2100,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       if (!getTLI()->shouldExpandCttzElements(ArgType))
         return getTypeLegalizationCost(RetTy).first;
 
-      // TODO: The costs below reflect the expansion code in
-      // SelectionDAGBuilder, but we may want to sacrifice some accuracy in
-      // favour of compile time.
-
-      // Find the smallest "sensible" element type to use for the expansion.
       bool ZeroIsPoison = !cast<ConstantInt>(Args[1])->isZero();
-      ConstantRange VScaleRange(APInt(64, 1), APInt::getZero(64));
-      if (isa<ScalableVectorType>(ICA.getArgTypes()[0]) && I && I->getCaller())
-        VScaleRange = getVScaleRange(I->getCaller(), 64);
-
-      unsigned EltWidth = getTLI()->getBitWidthForCttzElements(
-          RetTy, ArgType.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
-      Type *NewEltTy = IntegerType::getIntNTy(RetTy->getContext(), EltWidth);
-
-      // Create the new vector type & get the vector length
-      Type *NewVecTy = VectorType::get(
-          NewEltTy, cast<VectorType>(Args[0]->getType())->getElementCount());
-
-      IntrinsicCostAttributes StepVecAttrs(Intrinsic::stepvector, NewVecTy, {},
-                                           FMF);
-      InstructionCost Cost =
-          thisT()->getIntrinsicInstrCost(StepVecAttrs, CostKind);
-
-      Cost +=
-          thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy, CostKind);
-      Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy,
-                                        Args[0]->getType(),
-                                        TTI::CastContextHint::None, CostKind);
-      Cost +=
-          thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy, CostKind);
-
-      IntrinsicCostAttributes ReducAttrs(Intrinsic::vector_reduce_umax,
-                                         NewEltTy, NewVecTy, FMF, I, 1);
-      Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs, CostKind);
-      Cost +=
-          thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy, CostKind);
-
-      return Cost;
+      return getCttzEltsCost(ICA, ZeroIsPoison, CostKind);
     }
     case Intrinsic::get_active_lane_mask:
     case Intrinsic::experimental_vector_match:
@@ -2602,6 +2566,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
                                           CmpInst::ICMP_ULT, CostKind);
       return Cost;
     }
+    case Intrinsic::experimental_cttz_elts:
+      // Cannot know ZeroIsPoison value in type-based cost-analysis.
+      // Since this Intrinsic is mostly generated by VPlan (which sets it to
+      // false) use false as sensible default.
+      return getCttzEltsCost(ICA, /*ZeroIsPoison=*/false, CostKind);
     case Intrinsic::experimental_memset_pattern:
       // This cost is set to match the cost of the memset_pattern16 libcall.
       // It should likely be re-evaluated after migration to this intrinsic
@@ -3415,6 +3384,53 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
   InstructionCost getVectorSplitCost() const { return 1; }
 
+  // TODO: The costs below reflect the expansion code in
+  // SelectionDAGBuilder, but we may want to sacrifice some accuracy in
+  // favour of compile time.
+  // This path should only be taken if Targets cannot custom lower this
+  // intrinsic.
+  InstructionCost getCttzEltsCost(const IntrinsicCostAttributes &ICA,
+                                  bool ZeroIsPoison,
+                                  TTI::TargetCostKind CostKind) const {
+    const IntrinsicInst *I = ICA.getInst();
+    Type *ArgTy = ICA.getArgTypes()[0];
+    EVT ArgType = getTLI()->getValueType(DL, ArgTy, true);
+    Type *RetTy = ICA.getReturnType();
+    FastMathFlags FMF = ICA.getFlags();
+
+    // Find the smallest "sensible" element type to use for the expansion.
+    ConstantRange VScaleRange(APInt(64, 1), APInt::getZero(64));
+    if (isa<ScalableVectorType>(ArgTy) && I && I->getCaller())
+      VScaleRange = getVScaleRange(I->getCaller(), 64);
+
+    unsigned EltWidth = getTLI()->getBitWidthForCttzElements(
+        RetTy, ArgType.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
+    Type *NewEltTy = IntegerType::getIntNTy(RetTy->getContext(), EltWidth);
+
+    // Create the new vector type & get the vector length
+    Type *NewVecTy =
+        VectorType::get(NewEltTy, cast<VectorType>(ArgTy)->getElementCount());
+
+    IntrinsicCostAttributes StepVecAttrs(Intrinsic::stepvector, NewVecTy, {},
+                                         FMF);
+    InstructionCost Cost =
+        thisT()->getIntrinsicInstrCost(StepVecAttrs, CostKind);
+
+    Cost +=
+        thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy, CostKind);
+    Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy, ArgTy,
+                                      TTI::CastContextHint::None, CostKind);
+    Cost +=
+        thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy, CostKind);
+
+    IntrinsicCostAttributes ReducAttrs(Intrinsic::vector_reduce_umax, NewEltTy,
+                                       NewVecTy, FMF, I, 1);
+    Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs, CostKind);
+    Cost +=
+        thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy, CostKind);
+
+    return Cost;
+  }
   /// @}
 };
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll b/llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll
index 094d73ddd0581..9d88f664a40dc 100644
--- a/llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v -intrinsic-cost-strategy=type-based-intrinsic-cost | FileCheck %s --check-prefix=TYPE
 
 define void @foo_no_vscale_range() {
 ; CHECK-LABEL: 'foo_no_vscale_range'
@@ -32,6 +33,37 @@ define void @foo_no_vscale_range() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 334 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE-LABEL: 'foo_no_vscale_range'
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 661 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 334 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 661 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 334 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
   %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
@@ -98,6 +130,37 @@ define void @foo_vscale_range_2_16() vscale_range(2,16) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 171 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE-LABEL: 'foo_vscale_range_2_16'
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 171 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 171 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 171 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 171 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
+; TYPE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
   %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
@@ -131,3 +194,65 @@ define void @foo_vscale_range_2_16() vscale_range(2,16) {
 
   ret void
 }
+
+define void @foo_fixed_len_vectors() {
+; CHECK-LABEL: 'foo_fixed_len_vectors'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res.i32.v2i1.false = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res.i32.v4i1.false = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res.i32.v8i1.false = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res.i32.v64i1.false = call i32 @llvm.experimental.cttz.elts.i32.v64i1(<64 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res.i32.v128i1.false = call i32 @llvm.experimental.cttz.elts.i32.v128i1(<128 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 663 for instruction: %res.i32.v1024i1.false = call i32 @llvm.experimental.cttz.elts.i32.v1024i1(<1024 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1319 for instruction: %res.i32.v2048i1.false = call i32 @llvm.experimental.cttz.elts.i32.v2048i1(<2048 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i32.v2i1.true = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1319 for instruction: %res.i32.v2048i1.true = call i32 @llvm.experimental.cttz.elts.i32.v2048i1(<2048 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %res.i32.v2i32 = call i32 @llvm.experimental.cttz.elts.i32.v2i32(<2 x i32> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %res.i32.v4i32 = call i32 @llvm.experimental.cttz.elts.i32.v4i32(<4 x i32> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 171 for instruction: %res.i32.v32i32 = call i32 @llvm.experimental.cttz.elts.i32.v32i32(<32 x ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/184578


More information about the llvm-commits mailing list