[llvm] c6ac717 - [Attributor] Allow multiple uses of a casted function pointer
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 18:56:04 PST 2020
Author: Johannes Doerfert
Date: 2020-02-19T20:43:38-06:00
New Revision: c6ac717aa70d3f31c0a4fd6385e8baaa9f3e2724
URL: https://github.com/llvm/llvm-project/commit/c6ac717aa70d3f31c0a4fd6385e8baaa9f3e2724
DIFF: https://github.com/llvm/llvm-project/commit/c6ac717aa70d3f31c0a4fd6385e8baaa9f3e2724.diff
LOG: [Attributor] Allow multiple uses of a casted function pointer
If a function pointer is casted into a different type the resulting
expression can be a constant. If so, it can be used multiple times which
cannot be handled by the AbstractCallSite constructor alone. Instead, we
follow the cast expression uses now explicitly during the call site
traversal.
Added:
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Transforms/IPO/Attributor.cpp
llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll
llvm/test/Transforms/Attributor/callbacks.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 4da26e2e4973..54a980523d28 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -231,6 +231,8 @@ struct IRPosition {
/// Create a position describing the argument of \p ACS at position \p ArgNo.
static const IRPosition callsite_argument(AbstractCallSite ACS,
unsigned ArgNo) {
+ if (ACS.getNumArgOperands() <= ArgNo)
+ return IRPosition();
int CSArgNo = ACS.getCallArgOperandNo(ArgNo);
if (CSArgNo >= 0)
return IRPosition::callsite_argument(
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 0c806eb8de15..aaa912b7c02d 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -4593,21 +4593,24 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
auto PredForCallSite = [&](AbstractCallSite ACS) {
- // Check if we have an associated argument or not (which can happen for
- // callback calls).
- Value *ArgOp = ACS.getCallArgOperand(getArgNo());
- if (!ArgOp)
+ const IRPosition &ACSArgPos =
+ IRPosition::callsite_argument(ACS, getArgNo());
+ // Check if a coresponding argument was found or if it is on not
+ // associated (which can happen for callback calls).
+ if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
return false;
+
// We can only propagate thread independent values through callbacks.
// This is
diff erent to direct/indirect call sites because for them we
// know the thread executing the caller and callee is the same. For
// callbacks this is not guaranteed, thus a thread dependent value could
// be
diff erent for the caller and callee, making it invalid to propagate.
+ Value &ArgOp = ACSArgPos.getAssociatedValue();
if (ACS.isCallbackCall())
- if (auto *C = dyn_cast<Constant>(ArgOp))
+ if (auto *C = dyn_cast<Constant>(&ArgOp))
if (C->isThreadDependent())
return false;
- return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue);
+ return checkAndUpdate(A, *this, ArgOp, SimplifiedAssociatedValue);
};
bool AllCallSitesKnown;
@@ -7289,13 +7292,23 @@ bool Attributor::checkForAllCallSites(
// If we do not require all call sites we might not see all.
AllCallSitesKnown = RequireAllCallSites;
- for (const Use &U : Fn.uses()) {
+ SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses()));
+ for (unsigned u = 0; u < Uses.size(); ++u) {
+ const Use &U = *Uses[u];
LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in "
<< *U.getUser() << "\n");
if (isAssumedDead(U, QueryingAA, nullptr, /* CheckBBLivenessOnly */ true)) {
LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
continue;
}
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
+ if (CE->isCast() && CE->getType()->isPointerTy() &&
+ CE->getType()->getPointerElementType()->isFunctionTy()) {
+ for (const Use &CEU : CE->uses())
+ Uses.push_back(&CEU);
+ continue;
+ }
+ }
AbstractCallSite ACS(&U);
if (!ACS) {
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
index 0716960b3854..11cd72a41041 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
@@ -30,14 +30,14 @@
; This test is just to verify that we do not crash/assert due to mismatch in
; argument count between the caller and callee.
-define dso_local void @foo(i16 %a) {
+define dso_local i16 @foo(i16 %a) {
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: (i16 [[A:%.*]])
; CHECK-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16)*)(i16 [[A]])
-; CHECK-NEXT: ret void
+; CHECK-NEXT: ret i16 [[CALL]]
;
%call = call i16 bitcast (i16 (i16, i16) * @bar to i16 (i16) *)(i16 %a)
- ret void
+ ret i16 %call
}
define internal i16 @bar(i16 %p1, i16 %p2) {
@@ -73,7 +73,7 @@ define internal i16 @vararg_prop(i16 %p1, ...) {
define internal i16 @vararg_no_prop(i16 %p1, i16 %p2, ...) {
; CHECK-LABEL: define {{[^@]+}}@vararg_no_prop
; CHECK-SAME: (i16 returned [[P1:%.*]], i16 [[P2:%.*]], ...)
-; CHECK-NEXT: ret i16 [[P1]]
+; CHECK-NEXT: ret i16 7
;
ret i16 %p1
}
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll
index e7f8705675bb..a91e8eeee5ee 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll
@@ -4,14 +4,14 @@
; This test is just to verify that we do not crash/assert due to mismatch in
; argument type between the caller and callee.
-define dso_local void @foo(i16 %a) {
+define dso_local i16 @foo(i16 %a) {
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: (i16 [[A:%.*]])
; CHECK-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7)
-; CHECK-NEXT: ret void
+; CHECK-NEXT: ret i16 [[CALL]]
;
%call = call i16 bitcast (i16 (i16, i16) * @bar to i16 (i16, i32) *)(i16 %a, i32 7)
- ret void
+ ret i16 %call
}
define internal i16 @bar(i16 %p1, i16 %p2) {
diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll
index 62df702b6800..fb0984a835a6 100644
--- a/llvm/test/Transforms/Attributor/callbacks.ll
+++ b/llvm/test/Transforms/Attributor/callbacks.ll
@@ -170,5 +170,61 @@ declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture)
declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
+; Test 3
+;
+; Basically test 2 with the casted callback callee used twice.
+
+define void @t3_caller(i32* noalias %a) {
+; CHECK-LABEL: define {{[^@]+}}@t3_caller
+; CHECK-SAME: (i32* noalias nocapture align 256 [[A:%.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32
+; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64
+; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: store i32 42, i32* [[B]], align 32
+; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64
+; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %b = alloca i32, align 32
+ %c = alloca i32*, align 64
+ %ptr = alloca i32, align 128
+ %0 = bitcast i32* %b to i8*
+ store i32 42, i32* %b, align 4
+ store i32* %b, i32** %c, align 8
+ call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
+ call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
+ ret void
+}
+
+; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
+; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
+;
+; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls.
+define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
+; CHECK-LABEL: define {{[^@]+}}@t3_callback_callee
+; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
+; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
+; CHECK-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %ptr_val = load i32, i32* %ptr, align 8
+ store i32 %ptr_val, i32* %is_not_null
+ %0 = load i32*, i32** %c, align 8
+ tail call void @t3_check(i32* %a, i64 %b, i32* %0)
+ ret void
+}
+
+declare void @t3_check(i32* nocapture align 256, i64, i32* nocapture)
+
+declare !callback !0 void @t3_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
+
!0 = !{!1}
!1 = !{i64 2, i64 -1, i64 -1, i1 true}
More information about the llvm-commits
mailing list