[llvm] bb9449d - [InstCombine] Fold @llvm.experimental.get.vector.length when cnt <= max_lanes (#169293)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 23:16:08 PST 2025
Author: Luke Lau
Date: 2025-11-27T07:16:03Z
New Revision: bb9449d5bbd72441d8f95052ddfd29e2d29297d7
URL: https://github.com/llvm/llvm-project/commit/bb9449d5bbd72441d8f95052ddfd29e2d29297d7
DIFF: https://github.com/llvm/llvm-project/commit/bb9449d5bbd72441d8f95052ddfd29e2d29297d7.diff
LOG: [InstCombine] Fold @llvm.experimental.get.vector.length when cnt <= max_lanes (#169293)
On RISC-V, some loops that the loop vectorizer vectorizes pre-LTO may
turn out to have the exact trip count exposed after LTO, see #164762.
If the trip count is small enough we can fold away the
@llvm.experimental.get.vector.length intrinsic based on this corollary
from the LangRef:
> If %cnt is less than or equal to %max_lanes, the return value is equal
to %cnt.
This on its own doesn't remove the @llvm.experimental.get.vector.length
in #164762 since we also need to teach computeKnownBits about
@llvm.experimental.get.vector.length and the sub recurrence, but this PR
is a starting point.
I've added this in InstCombine rather than InstSimplify since we may
need to insert a truncation (@llvm.experimental.get.vector.length can
take an i64 %cnt argument, the result is always i32).
Note that there was something similar done in VPlan in #167647 for when
the loop vectorizer knows the trip count.
Added:
llvm/test/Transforms/InstCombine/get_vector_length.ll
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9543d97616ae3..743c4f574e131 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4016,6 +4016,27 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
+ case Intrinsic::experimental_get_vector_length: {
+ // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
+ unsigned BitWidth =
+ std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
+ II->getType()->getScalarSizeInBits());
+ ConstantRange Cnt =
+ computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
+ SQ.getWithInstruction(II))
+ .zextOrTrunc(BitWidth);
+ ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
+ ->getValue()
+ .zextOrTrunc(Cnt.getBitWidth());
+ if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
+ MaxLanes = MaxLanes.multiply(
+ getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
+
+ if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
+ return replaceInstUsesWith(
+ *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
+ return nullptr;
+ }
default: {
// Handle target specific intrinsics
std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
diff --git a/llvm/test/Transforms/InstCombine/get_vector_length.ll b/llvm/test/Transforms/InstCombine/get_vector_length.ll
new file mode 100644
index 0000000000000..122beeae866f3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/get_vector_length.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine,verify -S | FileCheck %s
+
+define i32 @cnt_known_lt() {
+; CHECK-LABEL: define i32 @cnt_known_lt() {
+; CHECK-NEXT: ret i32 1
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i32 1, i32 2, i1 false)
+ ret i32 %x
+}
+
+define i32 @cnt_not_known_lt() {
+; CHECK-LABEL: define i32 @cnt_not_known_lt() {
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 2, i32 1, i1 false)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 false)
+ ret i32 %x
+}
+
+define i32 @cnt_known_lt_scalable() vscale_range(2, 4) {
+; CHECK-LABEL: define i32 @cnt_known_lt_scalable(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret i32 2
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 true)
+ ret i32 %x
+}
+
+define i32 @cnt_not_known_lt_scalable() {
+; CHECK-LABEL: define i32 @cnt_not_known_lt_scalable() {
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 2, i32 1, i1 true)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 true)
+ ret i32 %x
+}
+
+define i32 @cnt_known_lt_runtime(i32 %x) {
+; CHECK-LABEL: define i32 @cnt_known_lt_runtime(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[X]], 4
+; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]])
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %icmp = icmp ule i32 %x, 3
+ call void @llvm.assume(i1 %icmp)
+ %y = call i32 @llvm.experimental.get.vector.length(i32 %x, i32 3, i1 false)
+ ret i32 %y
+}
+
+define i32 @cnt_known_lt_runtime_trunc(i64 %x) {
+; CHECK-LABEL: define i32 @cnt_known_lt_runtime_trunc(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[X]], 4
+; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]])
+; CHECK-NEXT: [[Y:%.*]] = trunc nuw nsw i64 [[X]] to i32
+; CHECK-NEXT: ret i32 [[Y]]
+;
+ %icmp = icmp ule i64 %x, 3
+ call void @llvm.assume(i1 %icmp)
+ %y = call i32 @llvm.experimental.get.vector.length(i64 %x, i32 3, i1 false)
+ ret i32 %y
+}
+
+; FIXME: We should be able to deduce the constant range from AssumptionCache
+; rather than relying on KnownBits, which in this case only knows x <= 3.
+define i32 @cnt_known_lt_runtime_assumption(i32 %x) {
+; CHECK-LABEL: define i32 @cnt_known_lt_runtime_assumption(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[X]], 3
+; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]])
+; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[X]], i32 2, i1 false)
+; CHECK-NEXT: ret i32 [[Y]]
+;
+ %icmp = icmp ule i32 %x, 2
+ call void @llvm.assume(i1 %icmp)
+ %y = call i32 @llvm.experimental.get.vector.length(i32 %x, i32 2, i1 false)
+ ret i32 %y
+}
+
+
+define i32 @cnt_known_lt_i16() {
+; CHECK-LABEL: define i32 @cnt_known_lt_i16() {
+; CHECK-NEXT: ret i32 1
+;
+ %x = call i32 @llvm.experimental.get.vector.length(i16 1, i32 2, i1 false)
+ ret i32 %x
+}
More information about the llvm-commits
mailing list