[llvm] [Transforms][CodeExtraction] bug fix regions with stackrestore (PR #118564)
Abhay Kanhere via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 09:56:47 PST 2024
https://github.com/AbhayKanhere updated https://github.com/llvm/llvm-project/pull/118564
>From 83a292c089629a50c6814b79f3e18e160f124c9f Mon Sep 17 00:00:00 2001
From: Abhay Kanhere <abhay at kanhere.net>
Date: Tue, 3 Dec 2024 16:13:45 -0800
Subject: [PATCH 1/2] [Transforms][CodeExtraction] bug fix regions with
stackrestore Ensure code extraction for outlining to a function does not
create a function with tail-call i.e. using stacksave of caller to restore
stack. Code Extractor can include all other basic blocks without such stack
restore blocks.
symmetric scenario with stacksave with out-of-fn use
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 25 +++++++++
.../outline-inner-region-stacktoocomplex.ll | 51 +++++++++++++++++++
2 files changed, 76 insertions(+)
create mode 100644 llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 6539f924c2edf4..3f54a90e577022 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -627,6 +627,31 @@ bool CodeExtractor::isEligible() const {
return false;
}
}
+ // stacksave as input implies stackrestore in the outlined function
+ // This can confuse prologue epilogue insertion phase
+ // stacksave's uses must not cross outlined function
+ for (BasicBlock *BB : Blocks) {
+ for (Instruction &II : *BB) {
+ if (IntrinsicInst *Intrin = dyn_cast<IntrinsicInst>(&II)) {
+ if (Intrin->getIntrinsicID() == Intrinsic::stacksave) {
+ for (User *U : Intrin->users())
+ if (!definedInRegion(Blocks, U)) {
+ return false; // stack-restore outside outlined region
+ }
+ }
+ }
+ for (auto &OI : II.operands()) {
+ Value *V = OI;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(V)) {
+ if (II->getIntrinsicID() == Intrinsic::stacksave) {
+ if (definedInCaller(Blocks, V)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
return true;
}
diff --git a/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll b/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
new file mode 100644
index 00000000000000..eee3b16c5e0060
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-max-params=1 < %s | FileCheck %s
+
+target datalayout = "E-m:a-p:32:32-i64:64-n32"
+target triple = "powerpc64-ibm-aix7.2.0.0"
+
+define void @foo(i32 %cond) {
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: if.then:
+; CHECK: if.then1:
+; CHECK: if.else:
+; CHECK: call {{.*}}@sink
+; CHECK: if.end:
+; CHECK: if.end2:
+;
+entry:
+ %cond.addr = alloca i32
+ store i32 %cond, ptr %cond.addr
+ %0 = load i32, ptr %cond.addr
+ %stks = tail call ptr @llvm.stacksave.p0()
+ %tobool = icmp ne i32 %0, 0
+ br i1 %tobool, label %if.then, label %if.end2
+
+if.then: ; preds = %entry
+ %1 = load i32, ptr %cond.addr
+ call void @sink(i32 %0)
+ %cmp = icmp sgt i32 %1, 10
+ br i1 %cmp, label %if.then1, label %if.else
+
+if.then1: ; preds = %if.then
+ call void @sideeffect(i32 2)
+ br label %if.end
+
+if.else: ; preds = %if.then
+ call void @sink(i32 0)
+ call void @sideeffect(i32 0)
+ call void @llvm.stackrestore.p0(ptr %stks)
+ br label %if.end
+
+
+if.end: ; preds = %if.else, %if.then1
+ br label %if.end2
+
+if.end2: ; preds = %entry
+ call void @sideeffect(i32 1)
+ ret void
+}
+
+
+declare void @sideeffect(i32)
+
+declare void @sink(i32) cold
>From 28d7301c4481455993f8302ec22065b2b75c78f0 Mon Sep 17 00:00:00 2001
From: Abhay Kanhere <abhay at kanhere.net>
Date: Tue, 10 Dec 2024 13:57:11 -0800
Subject: [PATCH 2/2] Fix and comments addressed
refined fix with only instructions in block
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 35 ++++++++---------
.../outline-inner-region-stacktoocomplex.ll | 38 ++++++++++++++-----
2 files changed, 44 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 3f54a90e577022..e81f706de24b67 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -627,28 +627,23 @@ bool CodeExtractor::isEligible() const {
return false;
}
}
- // stacksave as input implies stackrestore in the outlined function
- // This can confuse prologue epilogue insertion phase
- // stacksave's uses must not cross outlined function
+ // stacksave as input implies stackrestore in the outlined function.
+ // This can confuse prolog epilog insertion phase.
+ // stacksave's uses must not cross outlined function.
for (BasicBlock *BB : Blocks) {
- for (Instruction &II : *BB) {
- if (IntrinsicInst *Intrin = dyn_cast<IntrinsicInst>(&II)) {
- if (Intrin->getIntrinsicID() == Intrinsic::stacksave) {
- for (User *U : Intrin->users())
- if (!definedInRegion(Blocks, U)) {
- return false; // stack-restore outside outlined region
- }
- }
+ for (Instruction &I : *BB) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
+ continue;
+ bool IsSave = II->getIntrinsicID() == Intrinsic::stacksave;
+ bool IsRestore = II->getIntrinsicID() == Intrinsic::stackrestore;
+ if (IsSave && any_of(II->users(), [&Blks = this->Blocks](User *U) {
+ return !definedInRegion(Blks, U);
+ })) {
+ return false;
}
- for (auto &OI : II.operands()) {
- Value *V = OI;
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(V)) {
- if (II->getIntrinsicID() == Intrinsic::stacksave) {
- if (definedInCaller(Blocks, V)) {
- return false;
- }
- }
- }
+ if (IsRestore && !definedInRegion(Blocks, II->getArgOperand(0))) {
+ return false;
}
}
}
diff --git a/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll b/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
index eee3b16c5e0060..2ef827b04e6e2a 100644
--- a/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
+++ b/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
@@ -1,16 +1,36 @@
-; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-max-params=1 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=hotcoldsplit --hotcoldsplit-max-params=1 < %s | FileCheck %s
target datalayout = "E-m:a-p:32:32-i64:64-n32"
-target triple = "powerpc64-ibm-aix7.2.0.0"
define void @foo(i32 %cond) {
-; CHECK-LABEL: define {{.*}}@foo(
-; CHECK: if.then:
-; CHECK: if.then1:
-; CHECK: if.else:
-; CHECK: call {{.*}}@sink
-; CHECK: if.end:
-; CHECK: if.end2:
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: i32 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[COND]], ptr [[COND_ADDR]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+; CHECK-NEXT: [[STKS:%.*]] = tail call ptr @llvm.stacksave.p0()
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[IF_END2:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+; CHECK-NEXT: call void @sink(i32 [[TMP0]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 10
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN1:.*]], label %[[IF_ELSE:.*]]
+; CHECK: [[IF_THEN1]]:
+; CHECK-NEXT: call void @sideeffect(i32 2)
+; CHECK-NEXT: br label %[[IF_END:.*]]
+; CHECK: [[IF_ELSE]]:
+; CHECK-NEXT: call void @sink(i32 0)
+; CHECK-NEXT: call void @sideeffect(i32 0)
+; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STKS]])
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: br label %[[IF_END2]]
+; CHECK: [[IF_END2]]:
+; CHECK-NEXT: call void @sideeffect(i32 1)
+; CHECK-NEXT: ret void
;
entry:
%cond.addr = alloca i32
More information about the llvm-commits
mailing list