[llvm] 9186df9 - [InlineCost] Simplify extractvalue across callsite (#145054)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 06:15:31 PDT 2025
Author: Tobias Stadler
Date: 2025-06-24T14:15:27+01:00
New Revision: 9186df9b088f425182c58c7eee23645a77d7591c
URL: https://github.com/llvm/llvm-project/commit/9186df9b088f425182c58c7eee23645a77d7591c
DIFF: https://github.com/llvm/llvm-project/commit/9186df9b088f425182c58c7eee23645a77d7591c.diff
LOG: [InlineCost] Simplify extractvalue across callsite (#145054)
Motivation: When using libc++, `std::bitset<64>::count()` doesn't
optimize to a single popcount instruction on AArch64, because we fail to
inline the library code completely. Inlining fails, because the internal
bit_iterator struct is passed as a [2 x i64] %arg value on AArch64. The
value is built using insertvalue instructions and only one of the array
entries is constant. If we know that this entry is constant, we can
prove that half the function becomes dead. However, InlineCost only
considers operands for simplification if they are Constants, which %arg
is not. Without this simplification the function is too expensive to
inline.
Therefore, we had to teach InlineCost to support non-Constant simplified values
(PR #145083). Now, we enable this for extractvalue, because we want to simplify
the extractvalue with the insertvalues from the caller function. This is enough to
get bitset::count fully optimized.
There are similar opportunities we can explore for BinOps in the future
(e.g. cmp eq %arg1, %arg2 when the caller passes the same value into
both arguments), but we need to be careful here, because InstSimplify
isn't completely safe to use with operands owned by different functions.
Added:
llvm/test/Transforms/Inline/simplify-crosscallsite.ll
Modified:
llvm/lib/Analysis/InlineCost.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index fe1ceb74429c9..773a60479ae22 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -2316,9 +2316,18 @@ bool CallAnalyzer::visitStore(StoreInst &I) {
}
bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
- // Constant folding for extract value is trivial.
- if (simplifyInstruction(I))
- return true;
+ Value *Op = I.getAggregateOperand();
+
+ // Special handling, because we want to simplify extractvalue with a
+ // potential insertvalue from the caller.
+ if (Value *SimpleOp = getSimplifiedValueUnchecked(Op)) {
+ SimplifyQuery SQ(DL);
+ Value *SimpleV = simplifyExtractValueInst(SimpleOp, I.getIndices(), SQ);
+ if (SimpleV) {
+ SimplifiedValues[&I] = SimpleV;
+ return true;
+ }
+ }
// SROA can't look through these, but they may be free.
return Base::visitExtractValue(I);
diff --git a/llvm/test/Transforms/Inline/simplify-crosscallsite.ll b/llvm/test/Transforms/Inline/simplify-crosscallsite.ll
new file mode 100644
index 0000000000000..112f858aac0e3
--- /dev/null
+++ b/llvm/test/Transforms/Inline/simplify-crosscallsite.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes=inline | FileCheck %s
+
+define i32 @callee([2 x i32] %agg) {
+; CHECK-LABEL: define i32 @callee(
+; CHECK-SAME: [2 x i32] [[AGG:%.*]]) {
+; CHECK-NEXT: [[V:%.*]] = extractvalue [2 x i32] [[AGG]], 0
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[IS_NULL:.*]], label %[[NON_NULL:.*]]
+; CHECK: [[IS_NULL]]:
+; CHECK-NEXT: ret i32 0
+; CHECK: [[NON_NULL]]:
+; CHECK-NEXT: [[R:%.*]] = call i32 @callee([2 x i32] [[AGG]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %v = extractvalue [2 x i32] %agg, 0
+ %c = icmp eq i32 %v, 0
+ br i1 %c, label %is_null, label %non_null
+
+is_null:
+ ret i32 0
+
+non_null:
+ %r = call i32 @callee([2 x i32] %agg)
+ ret i32 %r
+}
+
+define i32 @caller_simplified(i32 %arg) {
+; CHECK-LABEL: define i32 @caller_simplified(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x i32] [[AGG0]], i32 [[ARG]], 1
+; CHECK-NEXT: ret i32 0
+;
+ %agg0 = insertvalue [2 x i32] poison, i32 0, 0
+ %agg1 = insertvalue [2 x i32] %agg0, i32 %arg, 1
+ %v = call i32 @callee([2 x i32] %agg1)
+ ret i32 %v
+}
+
+define i32 @caller_not_simplified(i32 %arg) {
+; CHECK-LABEL: define i32 @caller_not_simplified(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x i32] poison, i32 1, 0
+; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x i32] [[AGG0]], i32 [[ARG]], 1
+; CHECK-NEXT: [[V:%.*]] = call i32 @callee([2 x i32] [[AGG1]])
+; CHECK-NEXT: ret i32 [[V]]
+;
+ %agg0 = insertvalue [2 x i32] poison, i32 1, 0
+ %agg1 = insertvalue [2 x i32] %agg0, i32 %arg, 1
+ %v = call i32 @callee([2 x i32] %agg1)
+ ret i32 %v
+}
+
+define i32 @caller_not_simplified2(i32 %arg) {
+; CHECK-LABEL: define i32 @caller_not_simplified2(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x i32] poison, i32 0, 1
+; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x i32] [[AGG0]], i32 [[ARG]], 0
+; CHECK-NEXT: [[V:%.*]] = call i32 @callee([2 x i32] [[AGG1]])
+; CHECK-NEXT: ret i32 [[V]]
+;
+ %agg0 = insertvalue [2 x i32] poison, i32 0, 1
+ %agg1 = insertvalue [2 x i32] %agg0, i32 %arg, 0
+ %v = call i32 @callee([2 x i32] %agg1)
+ ret i32 %v
+}
More information about the llvm-commits
mailing list