[llvm] [Inline][Cloning] Drop incompatible attributes from `NewFunc` before `instSimplify` (PR #90489)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Thu May 2 07:29:44 PDT 2024
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/90489
>From 42c7cb69694d274e46bb98d156b9be31b308b62c Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Mon, 29 Apr 2024 17:53:10 +0200
Subject: [PATCH 1/2] Reapply "[Inline][Cloning] Defer simplification after
phi-nodes resolution"
Original commit: a61f9fe31750cee65c726fb51f1b14e31e177258
Multiple 2-stage buildbots were reporting failures. These issues have been
addressed separately.
Fixes: https://github.com/llvm/llvm-project/issues/87534.
---
llvm/lib/Transforms/Utils/CloneFunction.cpp | 92 +++++++------------
.../Inline/inline-deferred-instsimplify.ll | 6 +-
.../Inline/prof-update-sample-alwaysinline.ll | 1 -
.../Transforms/Inline/prof-update-sample.ll | 1 -
4 files changed, 39 insertions(+), 61 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 303a09805a9d84..38cab259f1dcef 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -540,18 +541,13 @@ void PruningFunctionCloner::CloneBlock(
RemapInstruction(NewInst, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
- // If we can simplify this instruction to some other value, simply add
- // a mapping to that value rather than inserting a new instruction into
- // the basic block.
- if (Value *V =
- simplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
- // On the off-chance that this simplifies to an instruction in the old
- // function, map it back into the new function.
- if (NewFunc != OldFunc)
- if (Value *MappedV = VMap.lookup(V))
- V = MappedV;
-
- if (!NewInst->mayHaveSideEffects()) {
+ // Eagerly constant fold the newly cloned instruction. If successful, add
+ // a mapping to the new value. Non-constant operands may be incomplete at
+ // this stage, thus instruction simplification is performed after
+ // processing phi-nodes.
+ if (Value *V = ConstantFoldInstruction(
+ NewInst, BB->getModule()->getDataLayout())) {
+ if (isInstructionTriviallyDead(NewInst)) {
VMap[&*II] = V;
NewInst->eraseFromParent();
continue;
@@ -823,52 +819,34 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
}
- // Make a second pass over the PHINodes now that all of them have been
- // remapped into the new function, simplifying the PHINode and performing any
- // recursive simplifications exposed. This will transparently update the
- // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
- // two PHINodes, the iteration over the old PHIs remains valid, and the
- // mapping will just map us to the new node (which may not even be a PHI
- // node).
+ // As phi-nodes have been now remapped, allow incremental simplification of
+ // newly-cloned instructions.
const DataLayout &DL = NewFunc->getParent()->getDataLayout();
- SmallSetVector<const Value *, 8> Worklist;
- for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
- if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
- Worklist.insert(PHIToResolve[Idx]);
-
- // Note that we must test the size on each iteration, the worklist can grow.
- for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
- const Value *OrigV = Worklist[Idx];
- auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
- if (!I)
- continue;
-
- // Skip over non-intrinsic callsites, we don't want to remove any nodes from
- // the CGSCC.
- CallBase *CB = dyn_cast<CallBase>(I);
- if (CB && CB->getCalledFunction() &&
- !CB->getCalledFunction()->isIntrinsic())
- continue;
-
- // See if this instruction simplifies.
- Value *SimpleV = simplifyInstruction(I, DL);
- if (!SimpleV)
- continue;
-
- // Stash away all the uses of the old instruction so we can check them for
- // recursive simplifications after a RAUW. This is cheaper than checking all
- // uses of To on the recursive step in most cases.
- for (const User *U : OrigV->users())
- Worklist.insert(cast<Instruction>(U));
-
- // Replace the instruction with its simplified value.
- I->replaceAllUsesWith(SimpleV);
-
- // If the original instruction had no side effects, remove it.
- if (isInstructionTriviallyDead(I))
- I->eraseFromParent();
- else
- VMap[OrigV] = I;
+ for (const auto &BB : *OldFunc) {
+ for (const auto &I : BB) {
+ auto *NewI = dyn_cast_or_null<Instruction>(VMap.lookup(&I));
+ if (!NewI)
+ continue;
+
+ // Skip over non-intrinsic callsites, we don't want to remove any nodes
+ // from the CGSCC.
+ CallBase *CB = dyn_cast<CallBase>(NewI);
+ if (CB && CB->getCalledFunction() &&
+ !CB->getCalledFunction()->isIntrinsic())
+ continue;
+
+ if (Value *V = simplifyInstruction(NewI, DL)) {
+ NewI->replaceAllUsesWith(V);
+
+ if (isInstructionTriviallyDead(NewI)) {
+ NewI->eraseFromParent();
+ } else {
+ // Did not erase it? Restore the new instruction into VMap previously
+ // dropped by `ValueIsRAUWd`.
+ VMap[&I] = NewI;
+ }
+ }
+ }
}
// Remap debug intrinsic operands now that all values have been mapped.
diff --git a/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll b/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
index 4a9c576f02719e..f02d03688f0397 100644
--- a/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
+++ b/llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
@@ -38,8 +38,6 @@ store_ptr_in_gvar: ; preds = %entry
check_pointers_are_equal: ; preds = %store_ptr_in_gvar, %entry
%phi = phi ptr [ %ptr, %store_ptr_in_gvar ], [ @other_g_var, %entry ]
-; FIXME: While inlining, the following is miscompiled to i1 false,
-; as %ptr in the phi-node is not taken into account.
%.not1 = icmp eq ptr %phi, %ptr
br i1 %.not1, label %return, label %abort
@@ -64,9 +62,13 @@ define i32 @main() {
; CHECK-NEXT: br label [[CHECK_POINTERS_ARE_EQUAL_I]]
; CHECK: check_pointers_are_equal.i:
; CHECK-NEXT: [[PHI_I:%.*]] = phi ptr [ [[G_VAR]], [[STORE_PTR_IN_GVAR_I]] ], [ @other_g_var, [[TMP0:%.*]] ]
+; CHECK-NEXT: [[DOTNOT1_I:%.*]] = icmp eq ptr [[PHI_I]], [[G_VAR]]
+; CHECK-NEXT: br i1 [[DOTNOT1_I]], label [[CALLEE_EXIT:%.*]], label [[ABORT_I:%.*]]
+; CHECK: abort.i:
; CHECK-NEXT: call void @abort()
; CHECK-NEXT: unreachable
; CHECK: callee.exit:
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr [[G_VAR]])
; CHECK-NEXT: ret i32 0
;
call void @callee(ptr noundef byval(%struct.a) align 8 @g_var)
diff --git a/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll b/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll
index d6b771e2629d2e..8af4d89663a4da 100644
--- a/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll
+++ b/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll
@@ -53,7 +53,6 @@ define void @caller() {
!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
attributes #0 = { alwaysinline }
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
-; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i32 2000}
; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}
diff --git a/llvm/test/Transforms/Inline/prof-update-sample.ll b/llvm/test/Transforms/Inline/prof-update-sample.ll
index 6cdd70e84e0c6d..e09b859b698120 100644
--- a/llvm/test/Transforms/Inline/prof-update-sample.ll
+++ b/llvm/test/Transforms/Inline/prof-update-sample.ll
@@ -52,7 +52,6 @@ define void @caller() {
!17 = !{!"branch_weights", i32 400}
!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
-; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i32 2000}
; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}
>From 1bb929833b18db4a26a4d145d7270597cb5d48ce Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Mon, 29 Apr 2024 17:53:29 +0200
Subject: [PATCH 2/2] [Inline][Cloning] Drop incompatible attributes from
`NewFunc`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Performing `instSimplify` while cloning is unsafe due to incomplete
remapping (as reported in #87534). Ideally, `instSimplify` ought to
reason on the updated newly-cloned function, after returns have been
rewritten and callee entry basic block / call-site have been fixed up.
This is in contrast to `CloneAndPruneIntoFromInst` behaviour, which
is inherently expected to clone basic blocks, with pruning on top of
– if any –, and not actually fixing up returns / CFG, which should be
up to the Inliner. We may solve this by letting `instSimplify` work on
the newly-cloned function, while maintaining old function attributes,
so as to avoid inconsistencies between the yet-to-be-solved return
type, and new function ret type attributes.
---
llvm/lib/Transforms/Utils/CloneFunction.cpp | 12 +++++++
.../Inline/inline-drop-attributes.ll | 32 +++++++++++++++++++
2 files changed, 44 insertions(+)
create mode 100644 llvm/test/Transforms/Inline/inline-drop-attributes.ll
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 38cab259f1dcef..6f6dc63653c173 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -819,6 +820,14 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
}
+ // Drop all incompatible return attributes that cannot be applied to NewFunc
+ // during cloning, so as to allow instruction simplification to reason on the
+ // old state of the function. The original attributes are restored later.
+ AttributeMask IncompatibleAttrs =
+ AttributeFuncs::typeIncompatible(OldFunc->getReturnType());
+ AttributeList Attrs = NewFunc->getAttributes();
+ NewFunc->removeRetAttrs(IncompatibleAttrs);
+
// As phi-nodes have been now remapped, allow incremental simplification of
// newly-cloned instructions.
const DataLayout &DL = NewFunc->getParent()->getDataLayout();
@@ -849,6 +858,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
}
+ // Restore attributes.
+ NewFunc->setAttributes(Attrs);
+
// Remap debug intrinsic operands now that all values have been mapped.
// Doing this now (late) preserves use-before-defs in debug intrinsics. If
// we didn't do this, ValueAsMetadata(use-before-def) operands would be
diff --git a/llvm/test/Transforms/Inline/inline-drop-attributes.ll b/llvm/test/Transforms/Inline/inline-drop-attributes.ll
new file mode 100644
index 00000000000000..9a451f4b8699a7
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-drop-attributes.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=inline -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
+
+define void @callee() {
+; CHECK-LABEL: define void @callee() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_PTR:%.*]] = load ptr, ptr null, align 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[VAL_PTR]], null
+; CHECK-NEXT: [[VAL:%.*]] = load i64, ptr null, align 8
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 undef, i64 [[VAL]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %val_ptr = load ptr, ptr null, align 8
+ %cmp = icmp eq ptr %val_ptr, null
+ %val = load i64, ptr null, align 8
+ %sel = select i1 %cmp, i64 undef, i64 %val
+ ret void
+}
+
+define noundef i1 @caller() {
+; CHECK-LABEL: define noundef i1 @caller() {
+; CHECK-NEXT: [[VAL_PTR_I:%.*]] = load ptr, ptr null, align 8
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq ptr [[VAL_PTR_I]], null
+; CHECK-NEXT: [[VAL_I:%.*]] = load i64, ptr null, align 8
+; CHECK-NEXT: [[SEL_I:%.*]] = select i1 [[CMP_I]], i64 undef, i64 [[VAL_I]]
+; CHECK-NEXT: ret i1 false
+;
+ call void @callee()
+ ret i1 false
+}
More information about the llvm-commits
mailing list