[flang-commits] [flang] 16e9601 - [Flang] Adjust the trampoline size for AArch64 and PPC (#118678)

Mon Jan 27 08:02:23 PST 2025

Author: ssijaric-nv
Date: 2025-01-27T08:02:18-08:00
New Revision: 16e9601e193e026d7f3f27e87f0adb81acf5969b

URL: https://github.com/llvm/llvm-project/commit/16e9601e193e026d7f3f27e87f0adb81acf5969b
DIFF: https://github.com/llvm/llvm-project/commit/16e9601e193e026d7f3f27e87f0adb81acf5969b.diff

LOG: [Flang] Adjust the trampoline size for AArch64 and PPC (#118678)

Set  the trampoline size to match that in compiler-rt/lib/builtins/trampoline_setup.c
and AArch64 and PPC lowering.

Added: 
    

Modified: 
    flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
    flang/test/Fir/boxproc.fir
    llvm/include/llvm/TargetParser/Triple.h
    llvm/lib/TargetParser/Triple.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
index c7c2dcede8aa67..26f4aee21d8bda 100644

--- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
+++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
@@ -270,10 +270,18 @@ class BoxedProcedurePass
             // Create the thunk.
             auto module = embox->getParentOfType<mlir::ModuleOp>();
             FirOpBuilder builder(rewriter, module);
+            const auto triple{fir::getTargetTriple(module)};
             auto loc = embox.getLoc();
             mlir::Type i8Ty = builder.getI8Type();
             mlir::Type i8Ptr = builder.getRefType(i8Ty);
-            mlir::Type buffTy = SequenceType::get({32}, i8Ty);
+            // For AArch64, PPC32 and PPC64, the thunk is populated by a call to
+            // __trampoline_setup, which is defined in
+            // compiler-rt/lib/builtins/trampoline_setup.c and requires the
+            // thunk size greater than 32 bytes.  For RISCV and x86_64, the
+            // thunk setup doesn't go through __trampoline_setup and fits in 32
+            // bytes.
+            fir::SequenceType::Extent thunkSize = triple.getTrampolineSize();
+            mlir::Type buffTy = SequenceType::get({thunkSize}, i8Ty);
             auto buffer = builder.create<AllocaOp>(loc, buffTy);
             mlir::Value closure =
                 builder.createConvert(loc, i8Ptr, embox.getHost());

diff  --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir
index 27d8953236e720..d5d78593dc8a74 100644
--- a/flang/test/Fir/boxproc.fir
+++ b/flang/test/Fir/boxproc.fir
@@ -1,7 +1,11 @@
-// RUN: tco %s | FileCheck %s
+// RUN: %if aarch64-registered-target %{tco --target=aarch64-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-AARCH64 %}
+// RUN: %if x86-registered-target %{tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-X86 %}
+// RUN: %if powerpc-registered-target %{tco --target=powerpc64le-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-PPC %}
 
 // CHECK-LABEL: define void @_QPtest_proc_dummy()
-// CHECK:         %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1
+// CHECK-AARCH64: %[[VAL_3:.*]] = alloca [36 x i8], i64 1, align 1
+// CHECK-X86:     %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1
+// CHECK-PPC:     %[[VAL_3:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1
 // CHECK:         %[[VAL_1:.*]] = alloca { ptr }, i64 1, align 8
 // CHECK:         %[[VAL_0:.*]] = alloca i32, i64 1, align 4
 // CHECK:         %[[VAL_2:.*]] = getelementptr { ptr }, ptr %[[VAL_1]], i32 0, i32 0
@@ -59,7 +63,9 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) {
 }
 
 // CHECK-LABEL: define void @_QPtest_proc_dummy_char()
-// CHECK:         %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1
+// CHECK-AARCH64: %[[VAL_20:.*]] = alloca [36 x i8], i64 1, align 1
+// CHECK-X86:     %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1
+// CHECK-PPC:     %[[VAL_20:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1
 // CHECK:         %[[VAL_2:.*]] = alloca { { ptr, i64 } }, i64 1, align 8
 // CHECK:         %[[VAL_1:.*]] = alloca [10 x i8], i64 1, align 1
 // CHECK:         %[[VAL_0:.*]] = alloca [40 x i8], i64 1, align 1

diff  --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 8097300c6e630c..ed6f48fba788b1 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -498,6 +498,9 @@ class Triple {
     return getArchPointerBitWidth(getArch());
   }
 
+  /// Returns the trampoline size in bytes for this configuration.
+  unsigned getTrampolineSize() const;
+
   /// Test whether the architecture is 64-bit
   ///
   /// Note that this tests for 64-bit pointer width, and nothing else. Note

diff  --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ed58e72089839b..e9e6f130f757cf 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -1711,6 +1711,26 @@ unsigned Triple::getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   llvm_unreachable("Invalid architecture value");
 }
 
+unsigned Triple::getTrampolineSize() const {
+  switch (getArch()) {
+  default:
+    break;
+  case Triple::ppc:
+  case Triple::ppcle:
+    if (isOSLinux())
+      return 40;
+    break;
+  case Triple::ppc64:
+  case Triple::ppc64le:
+    if (isOSLinux())
+      return 48;
+    break;
+  case Triple::aarch64:
+    return 36;
+  }
+  return 32;
+}
+
 bool Triple::isArch64Bit() const {
   return getArchPointerBitWidth(getArch()) == 64;
 }