[flang-commits] [flang] [flang] Implement COMPLEX(10) passing and return ABI for X86-64 linux (PR #74094)
via flang-commits
flang-commits at lists.llvm.org
Fri Dec 1 07:50:49 PST 2023
https://github.com/jeanPerier created https://github.com/llvm/llvm-project/pull/74094
COMPLEX(10) passing by value and returning follows C complex passing/returning ABI.
Cover the COMPLEX(10) case (X87 / __Complex long double on X86-64).
Implements System V ABI for AMD64 version 1.0.
The LLVM signatures match the one generated by clang for the __Complex long double case.
Note that a FIXME is added for the COMPLEX(8) case that is incorrect in a corner case. This will be fixed when dealing with passing derived type by value in BIND(C) context.
>From d9d8df247096c1b995d760aaa828db3b03c1af31 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Fri, 1 Dec 2023 07:35:59 -0800
Subject: [PATCH] [flang] Implement COMPLEX(10) passing and return ABI for
X86-64 linux
COMPLEX(10) passing by value and returning follows C complex
passing/returning ABI.
Cover the COMPLEX(10) case (X87 / __Complex long double on X86-64).
Implements System V ABI for AMD64 version 1.0.
The LLVM signatures match the one generated by clang for the
__Complex long double case.
Note that a FIXME is added for the COMPLEX(8) case that is incorrect
in a corner case. This will be fixed when dealing with passing
derived type by value in BIND(C) context.
---
flang/lib/Optimizer/CodeGen/Target.cpp | 16 ++++++++++
.../Fir/target-rewrite-complex-10-x86.fir | 32 +++++++++++++++++++
2 files changed, 48 insertions(+)
create mode 100644 flang/test/Fir/target-rewrite-complex-10-x86.fir
diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp
index b136c8f7a9dd98a..112f56e268c3b90 100644
--- a/flang/lib/Optimizer/CodeGen/Target.cpp
+++ b/flang/lib/Optimizer/CodeGen/Target.cpp
@@ -261,9 +261,20 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
// <2 x t> vector of 2 eleTy
marshal.emplace_back(fir::VectorType::get(2, eleTy), AT{});
} else if (sem == &llvm::APFloat::IEEEdouble()) {
+ // FIXME: In case of SSE register exhaustion, the ABI here may be
+ // incorrect since LLVM may pass the real via register and the imaginary
+ // part via the stack while the ABI it should be all in register or all
+ // in memory. Register occupancy must be analyzed here.
// two distinct double arguments
marshal.emplace_back(eleTy, AT{});
marshal.emplace_back(eleTy, AT{});
+ } else if (sem == &llvm::APFloat::x87DoubleExtended()) {
+ // Use a type that will be translated into LLVM as:
+ // { x86_fp80, x86_fp80 } struct of 2 fp128, byval, align 16
+ marshal.emplace_back(
+ fir::ReferenceType::get(mlir::TupleType::get(
+ eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
+ AT{/*align=*/16, /*byval=*/true});
} else if (sem == &llvm::APFloat::IEEEquad()) {
// Use a type that will be translated into LLVM as:
// { fp128, fp128 } struct of 2 fp128, byval, align 16
@@ -290,6 +301,11 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
mlir::TypeRange{eleTy, eleTy}),
AT{});
+ } else if (sem == &llvm::APFloat::x87DoubleExtended()) {
+ // { x86_fp80, x86_fp80 }
+ marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
+ mlir::TypeRange{eleTy, eleTy}),
+ AT{});
} else if (sem == &llvm::APFloat::IEEEquad()) {
// Use a type that will be translated into LLVM as:
// { fp128, fp128 } struct of 2 fp128, sret, align 16
diff --git a/flang/test/Fir/target-rewrite-complex-10-x86.fir b/flang/test/Fir/target-rewrite-complex-10-x86.fir
new file mode 100644
index 000000000000000..8d7cc9d6ef83dae
--- /dev/null
+++ b/flang/test/Fir/target-rewrite-complex-10-x86.fir
@@ -0,0 +1,32 @@
+// Test COMPLEX(10) passing and returning on X86
+// REQUIRES: x86-registered-target
+// RUN: fir-opt --target-rewrite="target=x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=AMD64
+// RUN: tco -target="x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=AMD64_LLVM
+
+func.func @returncomplex10() -> !fir.complex<10> {
+ %1 = fir.zero_bits !fir.complex<10>
+ return %1 : !fir.complex<10>
+}
+// AMD64-LABEL: func.func @returncomplex10() -> tuple<!fir.real<10>, !fir.real<10>> {
+// AMD64: %[[VAL_0:.*]] = fir.zero_bits !fir.complex<10>
+// AMD64: %[[VAL_1:.*]] = fir.alloca tuple<!fir.real<10>, !fir.real<10>>
+// AMD64: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<tuple<!fir.real<10>, !fir.real<10>>>) -> !fir.ref<!fir.complex<10>>
+// AMD64: fir.store %[[VAL_0]] to %[[VAL_2]] : !fir.ref<!fir.complex<10>>
+// AMD64: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<tuple<!fir.real<10>, !fir.real<10>>>
+// AMD64: return %[[VAL_3]] : tuple<!fir.real<10>, !fir.real<10>>
+
+// AMD64_LLVM: define { x86_fp80, x86_fp80 } @returncomplex10()
+
+func.func @takecomplex10(%z: !fir.complex<10>) {
+ %0 = fir.alloca !fir.complex<10>
+ fir.store %z to %0 : !fir.ref<!fir.complex<10>>
+ return
+}
+// AMD64-LABEL: func.func @takecomplex10(
+// AMD64-SAME: %[[VAL_0:.*]]: !fir.ref<tuple<!fir.real<10>, !fir.real<10>>> {llvm.align = 16 : i32, llvm.byval = tuple<!fir.real<10>, !fir.real<10>>}) {
+// AMD64: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<tuple<!fir.real<10>, !fir.real<10>>>) -> !fir.ref<!fir.complex<10>>
+// AMD64: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.complex<10>>
+// AMD64: %[[VAL_3:.*]] = fir.alloca !fir.complex<10>
+// AMD64: fir.store %[[VAL_2]] to %[[VAL_3]] : !fir.ref<!fir.complex<10>>
+
+// AMD64_LLVM: define void @takecomplex10(ptr byval({ x86_fp80, x86_fp80 }) align 16 %0)
More information about the flang-commits
mailing list