[llvm] [Transforms][CodeExtraction] bug fix regions with stackrestore (PR #118564)

Abhay Kanhere via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 10 15:04:18 PST 2024


https://github.com/AbhayKanhere updated https://github.com/llvm/llvm-project/pull/118564

>From 83a292c089629a50c6814b79f3e18e160f124c9f Mon Sep 17 00:00:00 2001
From: Abhay Kanhere <abhay at kanhere.net>
Date: Tue, 3 Dec 2024 16:13:45 -0800
Subject: [PATCH 1/2] [Transforms][CodeExtraction] bug fix regions with
 stackrestore    Ensure code extraction for outlining to a function does not  
 create a function with tail-call i.e. using stacksave of caller   to restore
 stack. Code Extractor can include all other   basic blocks without such stack
 restore blocks.

symmetric scenario with stacksave with out-of-fn use
---
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   | 25 +++++++++
 .../outline-inner-region-stacktoocomplex.ll   | 51 +++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll

diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 6539f924c2edf4..3f54a90e577022 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -627,6 +627,31 @@ bool CodeExtractor::isEligible() const {
         return false;
     }
   }
+  // stacksave as input implies stackrestore in the outlined function
+  // This can confuse prologue epilogue insertion phase
+  // stacksave's uses must not cross outlined function
+  for (BasicBlock *BB : Blocks) {
+    for (Instruction &II : *BB) {
+      if (IntrinsicInst *Intrin = dyn_cast<IntrinsicInst>(&II)) {
+        if (Intrin->getIntrinsicID() == Intrinsic::stacksave) {
+          for (User *U : Intrin->users())
+            if (!definedInRegion(Blocks, U)) {
+              return false; // stack-restore outside outlined region
+            }
+        }
+      }
+      for (auto &OI : II.operands()) {
+        Value *V = OI;
+        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(V)) {
+          if (II->getIntrinsicID() == Intrinsic::stacksave) {
+            if (definedInCaller(Blocks, V)) {
+              return false;
+            }
+          }
+        }
+      }
+    }
+  }
   return true;
 }
 
diff --git a/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll b/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
new file mode 100644
index 00000000000000..eee3b16c5e0060
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-max-params=1 < %s | FileCheck %s
+
+target datalayout = "E-m:a-p:32:32-i64:64-n32"
+target triple = "powerpc64-ibm-aix7.2.0.0"
+
+define void @foo(i32 %cond) {
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK:       if.then:
+; CHECK:       if.then1:
+; CHECK:       if.else:
+; CHECK:       call {{.*}}@sink
+; CHECK:       if.end:
+; CHECK:       if.end2:
+;
+entry:
+  %cond.addr = alloca i32
+  store i32 %cond, ptr %cond.addr
+  %0 = load i32, ptr %cond.addr
+  %stks =  tail call ptr @llvm.stacksave.p0()
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end2
+
+if.then:                                          ; preds = %entry
+  %1 = load i32, ptr %cond.addr
+  call void @sink(i32 %0)
+  %cmp = icmp sgt i32 %1, 10
+  br i1 %cmp, label %if.then1, label %if.else
+
+if.then1:                                         ; preds = %if.then
+  call void @sideeffect(i32 2)
+  br label %if.end
+
+if.else:                                          ; preds = %if.then
+  call void @sink(i32 0)
+  call void @sideeffect(i32 0)
+  call void @llvm.stackrestore.p0(ptr %stks)
+  br label %if.end
+
+
+if.end:                                           ; preds = %if.else, %if.then1
+  br label %if.end2
+
+if.end2:                                          ; preds = %entry
+  call void @sideeffect(i32 1)
+  ret void
+}
+
+
+declare void @sideeffect(i32)
+
+declare void @sink(i32) cold

>From 6fd46350e9e40b20c0be7c2788f6537f2cd23b52 Mon Sep 17 00:00:00 2001
From: Abhay Kanhere <abhay at kanhere.net>
Date: Tue, 10 Dec 2024 13:57:11 -0800
Subject: [PATCH 2/2] Fix and comments addressed

fix indvertant err
---
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   | 35 +++++++++--------
 .../outline-inner-region-stacktoocomplex.ll   | 38 ++++++++++++++-----
 2 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 3f54a90e577022..5186114e68cf24 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -627,27 +627,26 @@ bool CodeExtractor::isEligible() const {
         return false;
     }
   }
-  // stacksave as input implies stackrestore in the outlined function
-  // This can confuse prologue epilogue insertion phase
-  // stacksave's uses must not cross outlined function
+  // stacksave as input implies stackrestore in the outlined function.
+  // This can confuse prologue epilogue insertion phase.
+  // stacksave's uses must not cross outlined function.
   for (BasicBlock *BB : Blocks) {
     for (Instruction &II : *BB) {
-      if (IntrinsicInst *Intrin = dyn_cast<IntrinsicInst>(&II)) {
-        if (Intrin->getIntrinsicID() == Intrinsic::stacksave) {
-          for (User *U : Intrin->users())
-            if (!definedInRegion(Blocks, U)) {
-              return false; // stack-restore outside outlined region
-            }
-        }
+      IntrinsicInst *Intrin = dyn_cast<IntrinsicInst>(&II);
+      if (!Intrin || Intrin->getIntrinsicID() != Intrinsic::stacksave)
+        continue;
+      if (any_of(Intrin->users(), [&Blks = this->Blocks](User *U) {
+            return !definedInRegion(Blks, U);
+          })) {
+        return false;
       }
-      for (auto &OI : II.operands()) {
-        Value *V = OI;
-        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(V)) {
-          if (II->getIntrinsicID() == Intrinsic::stacksave) {
-            if (definedInCaller(Blocks, V)) {
-              return false;
-            }
-          }
+
+      for (Value *V : II.operands()) {
+        IntrinsicInst *IntrinIns = dyn_cast<IntrinsicInst>(V);
+        if (!IntrinIns || IntrinIns->getIntrinsicID() != Intrinsic::stacksave)
+          continue;
+        if (definedInCaller(Blocks, V)) {
+          return false;
         }
       }
     }
diff --git a/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll b/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
index eee3b16c5e0060..2ef827b04e6e2a 100644
--- a/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
+++ b/llvm/test/Transforms/HotColdSplit/outline-inner-region-stacktoocomplex.ll
@@ -1,16 +1,36 @@
-; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-max-params=1 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=hotcoldsplit --hotcoldsplit-max-params=1 < %s | FileCheck %s
 
 target datalayout = "E-m:a-p:32:32-i64:64-n32"
-target triple = "powerpc64-ibm-aix7.2.0.0"
 
 define void @foo(i32 %cond) {
-; CHECK-LABEL: define {{.*}}@foo(
-; CHECK:       if.then:
-; CHECK:       if.then1:
-; CHECK:       if.else:
-; CHECK:       call {{.*}}@sink
-; CHECK:       if.end:
-; CHECK:       if.end2:
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: i32 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+; CHECK-NEXT:    [[STKS:%.*]] = tail call ptr @llvm.stacksave.p0()
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[IF_END2:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+; CHECK-NEXT:    call void @sink(i32 [[TMP0]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 10
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN1:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN1]]:
+; CHECK-NEXT:    call void @sideeffect(i32 2)
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    call void @sink(i32 0)
+; CHECK-NEXT:    call void @sideeffect(i32 0)
+; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STKS]])
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    br label %[[IF_END2]]
+; CHECK:       [[IF_END2]]:
+; CHECK-NEXT:    call void @sideeffect(i32 1)
+; CHECK-NEXT:    ret void
 ;
 entry:
   %cond.addr = alloca i32



More information about the llvm-commits mailing list