[llvm] 2c9e9ff - [SCCP] Handle llvm.experimental.get.vector.length calls (#169527)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 1 02:29:26 PST 2025
Author: Luke Lau
Date: 2025-12-01T10:29:21Z
New Revision: 2c9e9ffa77e37fa0ff5d15325dab5471636b8a44
URL: https://github.com/llvm/llvm-project/commit/2c9e9ffa77e37fa0ff5d15325dab5471636b8a44
DIFF: https://github.com/llvm/llvm-project/commit/2c9e9ffa77e37fa0ff5d15325dab5471636b8a44.diff
LOG: [SCCP] Handle llvm.experimental.get.vector.length calls (#169527)
As noted in the reproducer provided in
https://github.com/llvm/llvm-project/issues/164762#issuecomment-3554719231,
on RISC-V after LTO we sometimes have trip counts exposed to vectorized
loops. The loop vectorizer will have generated calls to
@llvm.experimental.get.vector.length, but there are [some
properties](https://llvm.org/docs/LangRef.html#id2399) about the
intrinsic we can use to simplify it:
- The result is always less than both Count and MaxLanes
- If Count <= MaxLanes, then the result is Count
This teaches SCCP to handle these cases with the intrinsic, which allows
some single-iteration-after-LTO loops to be unfolded.
#169293 is related and also simplifies the intrinsic in InstCombine via
computeKnownBits, but it can't fully remove the loop since
computeKnownBits only does limited reasoning on recurrences.
Added:
llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll
Modified:
llvm/lib/Transforms/Utils/SCCPSolver.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 4947d03a2dc66..951bf1ca62fc2 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -2098,6 +2098,38 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
return (void)mergeInValue(ValueState[II], II,
ValueLatticeElement::getRange(Result));
}
+ if (II->getIntrinsicID() == Intrinsic::experimental_get_vector_length) {
+ Value *CountArg = II->getArgOperand(0);
+ Value *VF = II->getArgOperand(1);
+ bool Scalable = cast<ConstantInt>(II->getArgOperand(2))->isOne();
+
+ // Computation happens in the larger type.
+ unsigned BitWidth = std::max(CountArg->getType()->getScalarSizeInBits(),
+ VF->getType()->getScalarSizeInBits());
+
+ ConstantRange Count = getValueState(CountArg)
+ .asConstantRange(CountArg->getType(), false)
+ .zextOrTrunc(BitWidth);
+ ConstantRange MaxLanes = getValueState(VF)
+ .asConstantRange(VF->getType(), false)
+ .zextOrTrunc(BitWidth);
+ if (Scalable)
+ MaxLanes =
+ MaxLanes.multiply(getVScaleRange(II->getFunction(), BitWidth));
+
+ // The result is always less than both Count and MaxLanes.
+ ConstantRange Result(
+ APInt::getZero(BitWidth),
+ APIntOps::umin(Count.getUpper(), MaxLanes.getUpper()));
+
+ // If Count <= MaxLanes, getvectorlength(Count, MaxLanes) = Count
+ if (Count.icmp(CmpInst::ICMP_ULE, MaxLanes))
+ Result = Count;
+
+ Result = Result.zextOrTrunc(II->getType()->getScalarSizeInBits());
+ return (void)mergeInValue(ValueState[II], II,
+ ValueLatticeElement::getRange(Result));
+ }
if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
// Compute result range for intrinsics supported by ConstantRange.
diff --git a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll
new file mode 100644
index 0000000000000..d0741161e729e
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -p sccp -S | FileCheck %s
+
+define i1 @result_le_count() {
+; CHECK-LABEL: define i1 @result_le_count() {
+; CHECK-NEXT: ret i1 true
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i32 3, i32 4, i1 false)
+ %res = icmp ule i32 %x, 3
+ ret i1 %res
+}
+
+define i1 @result_le_max_lanes(i32 %count) {
+; CHECK-LABEL: define i1 @result_le_max_lanes(
+; CHECK-SAME: i32 [[COUNT:%.*]]) {
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 3, i1 false)
+; CHECK-NEXT: ret i1 true
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 3, i1 false)
+ %res = icmp ule i32 %x, 3
+ ret i1 %res
+}
+
+define i1 @result_le_max_lanes_scalable(i32 %count) vscale_range(2, 4) {
+; CHECK-LABEL: define i1 @result_le_max_lanes_scalable(
+; CHECK-SAME: i32 [[COUNT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 4, i1 true)
+; CHECK-NEXT: ret i1 true
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 4, i1 true)
+ %res = icmp ule i32 %x, 16
+ ret i1 %res
+}
+
+define i32 @count_le_max_lanes() {
+; CHECK-LABEL: define i32 @count_le_max_lanes() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i32 4
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [4, %entry], [%iv.next, %loop]
+ %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false)
+ %iv.next = sub i32 %iv, %x
+ %ec = icmp eq i32 %iv.next, 0
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %x
+}
+
+; Can't simplify because %iv isn't <= max lanes.
+define i32 @count_not_le_max_lanes() {
+; CHECK-LABEL: define range(i32 0, 5) i32 @count_not_le_max_lanes() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 6, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 false)
+; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]]
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i32 [[X]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [6, %entry], [%iv.next, %loop]
+ %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false)
+ %iv.next = sub i32 %iv, %x
+ %ec = icmp eq i32 %iv.next, 0
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %x
+}
+
+define i32 @count_le_max_lanes_scalable_known() vscale_range(4, 8) {
+; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_known(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i32 16
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [16, %entry], [%iv.next, %loop]
+ %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true)
+ %iv.next = sub i32 %iv, %x
+ %ec = icmp eq i32 %iv.next, 0
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %x
+}
+
+; Can't simplify because %iv isn't guaranteed <= max lanes.
+define i32 @count_le_max_lanes_scalable_unknown() {
+; CHECK-LABEL: define range(i32 0, -1) i32 @count_le_max_lanes_scalable_unknown() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 16, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 true)
+; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]]
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i32 [[X]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [16, %entry], [%iv.next, %loop]
+ %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true)
+ %iv.next = sub i32 %iv, %x
+ %ec = icmp eq i32 %iv.next, 0
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %x
+}
+
+define i1 @result_le_overflow() {
+; CHECK-LABEL: define i1 @result_le_overflow() {
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 4294967296, i32 4, i1 false)
+; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 3
+; CHECK-NEXT: ret i1 [[RES]]
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i64 u0x100000000, i32 4, i1 false)
+ %res = icmp ule i32 %x, 3
+ ret i1 %res
+}
More information about the llvm-commits
mailing list