[flang-commits] [flang] [Flang] LoongArch64 support for BIND(C) derived types. (PR #117108)

Wed Nov 20 19:45:47 PST 2024

https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/117108

This patch supports both the passing and returning of BIND(C) type parameters.

Reference ABI:
https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#subroutine-calling-sequence

>From 32e04b6538486006c98c6b805b1057110c3a2c1a Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Wed, 20 Nov 2024 17:30:43 +0800
Subject: [PATCH] [Flang] LoongArch64 support for BIND(C) derived types.

This patch supports both the passing and returning of BIND(C) type parameters.

Reference ABI:
https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#subroutine-calling-sequence
---
 flang/lib/Optimizer/CodeGen/Target.cpp        | 308 ++++++++++++++++++
 .../Fir/struct-passing-loongarch64-byreg.fir  | 232 +++++++++++++
 ...uct-passing-return-loongarch64-bystack.fir |  80 +++++
 .../Fir/struct-return-loongarch64-byreg.fir   | 200 ++++++++++++
 4 files changed, 820 insertions(+)
 create mode 100644 flang/test/Fir/struct-passing-loongarch64-byreg.fir
 create mode 100644 flang/test/Fir/struct-passing-return-loongarch64-bystack.fir
 create mode 100644 flang/test/Fir/struct-return-loongarch64-byreg.fir

diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp
index 9ec055b1aecabb..90ce51552c687f 100644
--- a/flang/lib/Optimizer/CodeGen/Target.cpp
+++ b/flang/lib/Optimizer/CodeGen/Target.cpp
@@ -1081,6 +1081,9 @@ struct TargetLoongArch64 : public GenericTarget<TargetLoongArch64> {
   using GenericTarget::GenericTarget;
 
   static constexpr int defaultWidth = 64;
+  static constexpr int GRLen = defaultWidth; /* eight bytes */
+  static constexpr int GRLenInChar = GRLen / 8;
+  static constexpr int FRLen = defaultWidth; /* eight bytes */
 
   CodeGenSpecifics::Marshalling
   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
@@ -1151,6 +1154,311 @@ struct TargetLoongArch64 : public GenericTarget<TargetLoongArch64> {
 
     return GenericTarget::integerArgumentType(loc, argTy);
   }
+
+  /// Flatten non-basic types, resulting in an array of types containing only
+  /// `IntegerType` and `FloatType`.
+  std::vector<mlir::Type> flattenTypeList(mlir::Location loc,
+                                          const mlir::Type type) const {
+    std::vector<mlir::Type> flatTypes;
+
+    llvm::TypeSwitch<mlir::Type>(type)
+        .template Case<mlir::IntegerType>([&](mlir::IntegerType intTy) {
+          if (intTy.getWidth() != 0)
+            flatTypes.push_back(intTy);
+        })
+        .template Case<mlir::FloatType>([&](mlir::FloatType floatTy) {
+          if (floatTy.getWidth() != 0)
+            flatTypes.push_back(floatTy);
+        })
+        .template Case<mlir::ComplexType>([&](mlir::ComplexType cmplx) {
+          const auto *sem = &floatToSemantics(kindMap, cmplx.getElementType());
+          if (sem == &llvm::APFloat::IEEEsingle() ||
+              sem == &llvm::APFloat::IEEEdouble() ||
+              sem == &llvm::APFloat::IEEEquad())
+            std::fill_n(std::back_inserter(flatTypes), 2,
+                        cmplx.getElementType());
+          else
+            TODO(loc, "unsupported complx type(not IEEEsingle, IEEEdouble, "
+                      "IEEEquad) as a structure component for BIND(C), "
+                      "VALUE derived type argument and type return");
+        })
+        .template Case<fir::LogicalType>([&](fir::LogicalType logicalTy) {
+          const auto width = kindMap.getLogicalBitsize(logicalTy.getFKind());
+          if (width != 0)
+            flatTypes.push_back(
+                mlir::IntegerType::get(type.getContext(), width));
+        })
+        .template Case<fir::CharacterType>([&](fir::CharacterType charTy) {
+          flatTypes.push_back(mlir::IntegerType::get(type.getContext(), 8));
+        })
+        .template Case<fir::SequenceType>([&](fir::SequenceType seqTy) {
+          if (!seqTy.hasDynamicExtents()) {
+            std::size_t numOfEle = seqTy.getConstantArraySize();
+            auto eleTy = seqTy.getEleTy();
+            if (!mlir::isa<mlir::IntegerType, mlir::FloatType>(eleTy)) {
+              auto subTypeList = flattenTypeList(loc, eleTy);
+              if (subTypeList.size() != 0)
+                for (std::size_t i = 0; i < numOfEle; ++i)
+                  llvm::copy(subTypeList, std::back_inserter(flatTypes));
+            } else {
+              std::fill_n(std::back_inserter(flatTypes), numOfEle, eleTy);
+            }
+          } else
+            TODO(loc, "unsupported dynamic extent sequence type as a structure "
+                      "component for BIND(C), "
+                      "VALUE derived type argument and type return");
+        })
+        .template Case<fir::RecordType>([&](fir::RecordType recTy) {
+          for (auto component : recTy.getTypeList()) {
+            mlir::Type eleTy = component.second;
+            auto subTypeList = flattenTypeList(loc, eleTy);
+            if (subTypeList.size() != 0)
+              llvm::copy(subTypeList, std::back_inserter(flatTypes));
+          }
+        })
+        .template Case<fir::VectorType>([&](fir::VectorType vecTy) {
+          std::size_t numOfEle = vecTy.getLen();
+          auto eleTy = vecTy.getEleTy();
+          if (!(mlir::isa<mlir::IntegerType, mlir::FloatType>(eleTy))) {
+            auto subTypeList = flattenTypeList(loc, eleTy);
+            if (subTypeList.size() != 0)
+              for (std::size_t i = 0; i < numOfEle; ++i)
+                llvm::copy(subTypeList, std::back_inserter(flatTypes));
+          } else {
+            std::fill_n(std::back_inserter(flatTypes), numOfEle, eleTy);
+          }
+        })
+        .Default([&](mlir::Type ty) {
+          if (fir::conformsWithPassByRef(ty))
+            flatTypes.push_back(
+                mlir::IntegerType::get(type.getContext(), GRLen));
+          else
+            TODO(loc, "unsupported component type for BIND(C), VALUE derived "
+                      "type argument and type return");
+        });
+
+    return flatTypes;
+  }
+
+  /// Determine if a struct is eligible to be passed in FARs (and GARs) (i.e.,
+  /// when flattened it contains a single fp value, fp+fp, or int+fp of
+  /// appropriate size).
+  bool detectFARsEligibleStruct(mlir::Location loc, fir::RecordType recTy,
+                                mlir::Type &Field1Ty,
+                                mlir::Type &Field2Ty) const {
+
+    Field1Ty = Field2Ty = nullptr;
+    auto flatTypes = flattenTypeList(loc, recTy);
+    size_t flatSize = flatTypes.size();
+
+    // Cannot be eligible if the number of flattened types is equal to 0 or
+    // greater than 2.
+    if (flatSize == 0 || flatSize > 2)
+      return false;
+
+    bool isFirstAvaliableFloat = false;
+
+    assert((mlir::isa<mlir::IntegerType, mlir::FloatType>(flatTypes[0])) &&
+           "Type must be int or float after flattening");
+    if (auto floatTy = mlir::dyn_cast<mlir::FloatType>(flatTypes[0])) {
+      auto Size = floatTy.getWidth();
+      // Can't be eligible if larger than the FP registers. Half precision isn't
+      // currently supported on LoongArch and the ABI hasn't been confirmed, so
+      // default to the integer ABI in that case.
+      if (Size > FRLen || Size < 32)
+        return false;
+      isFirstAvaliableFloat = true;
+      Field1Ty = floatTy;
+    } else if (auto intTy = mlir::dyn_cast<mlir::IntegerType>(flatTypes[0])) {
+      if (intTy.getWidth() > GRLen)
+        return false;
+      Field1Ty = intTy;
+    }
+
+    // flatTypes has two elements
+    if (flatSize == 2) {
+      assert((mlir::isa<mlir::IntegerType, mlir::FloatType>(flatTypes[1])) &&
+             "Type must be integer or float after flattening");
+      if (auto floatTy = mlir::dyn_cast<mlir::FloatType>(flatTypes[1])) {
+        auto Size = floatTy.getWidth();
+        if (Size > FRLen || Size < 32)
+          return false;
+        Field2Ty = floatTy;
+        return true;
+      } else if (auto intTy = mlir::dyn_cast<mlir::IntegerType>(flatTypes[1])) {
+        // Can't be eligible if an integer type was already found (int+int pairs
+        // are not eligible).
+        if (!isFirstAvaliableFloat)
+          return false;
+        if (intTy.getWidth() > GRLen)
+          return false;
+        Field2Ty = intTy;
+        return true;
+      }
+    }
+
+    // return isFirstAvaliableFloat if flatTypes only has one element
+    return isFirstAvaliableFloat;
+  }
+
+  bool checkTypehasEnoughReg(mlir::Location loc, int &GARsLeft, int &FARsLeft,
+                             const mlir::Type type) const {
+    if (type == nullptr)
+      return true;
+
+    llvm::TypeSwitch<mlir::Type>(type)
+        .template Case<mlir::IntegerType>([&](mlir::IntegerType intTy) {
+          const auto width = intTy.getWidth();
+          assert(width <= 128 &&
+                 "integer type with width more than 128 bits is unexpected");
+          if (width == 0)
+            return;
+          if (width <= GRLen)
+            --GARsLeft;
+          else if (width <= 2 * GRLen)
+            GARsLeft = GARsLeft - 2;
+        })
+        .template Case<mlir::FloatType>([&](mlir::FloatType floatTy) {
+          const auto width = floatTy.getWidth();
+          assert(width <= 128 &&
+                 "float type with width more than 128 bits is unexpected");
+          if (width == 0)
+            return;
+          if (width == 32 || width == 64)
+            --FARsLeft;
+          else if (width <= GRLen)
+            --GARsLeft;
+          else if (width <= 2 * GRLen)
+            GARsLeft = GARsLeft - 2;
+        })
+        .Default([&](mlir::Type ty) {
+          if (fir::conformsWithPassByRef(ty))
+            --GARsLeft; // Pointers.
+          else
+            TODO(loc, "unsupported component type for BIND(C), VALUE derived "
+                      "type argument and type return");
+        });
+
+    return GARsLeft >= 0 && FARsLeft >= 0;
+  }
+
+  bool hasEnoughRegisters(mlir::Location loc, int GARsLeft, int FARsLeft,
+                          const Marshalling &previousArguments,
+                          const mlir::Type &Field1Ty,
+                          const mlir::Type &Field2Ty) const {
+
+    for (auto typeAndAttr : previousArguments) {
+      const auto &attr = std::get<Attributes>(typeAndAttr);
+      if (attr.isByVal()) {
+        // Previous argument passed on the stack, and its address is passed in
+        // GAR.
+        --GARsLeft;
+        continue;
+      }
+
+      // Previous aggregate arguments were marshalled into simpler arguments.
+      const auto &type = std::get<mlir::Type>(typeAndAttr);
+      auto flatTypes = flattenTypeList(loc, type);
+
+      for (auto &flatTy : flatTypes) {
+        if (!checkTypehasEnoughReg(loc, GARsLeft, FARsLeft, flatTy))
+          return false;
+      }
+    }
+
+    if (!checkTypehasEnoughReg(loc, GARsLeft, FARsLeft, Field1Ty))
+      return false;
+    if (!checkTypehasEnoughReg(loc, GARsLeft, FARsLeft, Field2Ty))
+      return false;
+    return true;
+  }
+
+  /// LoongArch64 subroutine calling sequence ABI in:
+  /// https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#subroutine-calling-sequence
+  CodeGenSpecifics::Marshalling
+  classifyStruct(mlir::Location loc, fir::RecordType recTy, int GARsLeft,
+                 int FARsLeft, bool isResult,
+                 const Marshalling &previousArguments) const {
+    CodeGenSpecifics::Marshalling marshal;
+
+    auto [recSize, recAlign] = fir::getTypeSizeAndAlignmentOrCrash(
+        loc, recTy, getDataLayout(), kindMap);
+    auto context = recTy.getContext();
+
+    if (recSize == 0) {
+      TODO(loc, "unsupported empty struct type for BIND(C), "
+                "VALUE derived type argument and type return");
+    }
+
+    if (recSize > 2 * GRLenInChar) {
+      marshal.emplace_back(
+          fir::ReferenceType::get(recTy),
+          AT{recAlign, /*byval=*/!isResult, /*sret=*/isResult});
+      return marshal;
+    }
+
+    // Pass by FARs(and GARs)
+    mlir::Type Field1Ty = nullptr, Field2Ty = nullptr;
+    if (detectFARsEligibleStruct(loc, recTy, Field1Ty, Field2Ty)) {
+      if (hasEnoughRegisters(loc, GARsLeft, FARsLeft, previousArguments,
+                             Field1Ty, Field2Ty)) {
+        if (!isResult) {
+          if (Field1Ty)
+            marshal.emplace_back(Field1Ty, AT{});
+          if (Field2Ty)
+            marshal.emplace_back(Field2Ty, AT{});
+        } else {
+          // Field1Ty is always preferred over Field2Ty for assignment, so there
+          // will never be a case where Field1Ty == nullptr and Field2Ty !=
+          // nullptr.
+          if (Field1Ty && !Field2Ty)
+            marshal.emplace_back(Field1Ty, AT{});
+          else if (Field1Ty && Field2Ty)
+            marshal.emplace_back(
+                mlir::TupleType::get(context,
+                                     mlir::TypeRange{Field1Ty, Field2Ty}),
+                AT{/*alignment=*/0, /*byval=*/true});
+        }
+        return marshal;
+      }
+    }
+
+    if (recSize <= GRLenInChar) {
+      marshal.emplace_back(mlir::IntegerType::get(context, GRLen), AT{});
+      return marshal;
+    }
+
+    if (recAlign == 2 * GRLenInChar) {
+      marshal.emplace_back(mlir::IntegerType::get(context, 2 * GRLen), AT{});
+      return marshal;
+    }
+
+    // recSize > GRLenInChar && recSize <= 2 * GRLenInChar
+    marshal.emplace_back(
+        fir::SequenceType::get({2}, mlir::IntegerType::get(context, GRLen)),
+        AT{});
+    return marshal;
+  }
+
+  /// Marshal a derived type passed by value like a C struct.
+  CodeGenSpecifics::Marshalling
+  structArgumentType(mlir::Location loc, fir::RecordType recTy,
+                     const Marshalling &previousArguments) const override {
+    int GARsLeft = 8;
+    int FARsLeft = FRLen ? 8 : 0;
+
+    return classifyStruct(loc, recTy, GARsLeft, FARsLeft, /*isResult=*/false,
+                          previousArguments);
+  }
+
+  CodeGenSpecifics::Marshalling
+  structReturnType(mlir::Location loc, fir::RecordType recTy) const override {
+    // The rules for return and argument types are the same.
+    int GARsLeft = 2;
+    int FARsLeft = FRLen ? 2 : 0;
+    return classifyStruct(loc, recTy, GARsLeft, FARsLeft, /*isResult=*/true,
+                          {});
+  }
 };
 } // namespace
 
diff --git a/flang/test/Fir/struct-passing-loongarch64-byreg.fir b/flang/test/Fir/struct-passing-loongarch64-byreg.fir
new file mode 100644
index 00000000000000..576ea6459e17a0
--- /dev/null
+++ b/flang/test/Fir/struct-passing-loongarch64-byreg.fir
@@ -0,0 +1,232 @@
+/// Test LoongArch64 ABI rewrite of struct passed by value (BIND(C), VALUE derived types).
+/// This test test cases where the struct can be passed in registers.
+/// Test cases can be roughly divided into two categories:
+///  - struct with a single intrinsic component;
+///  - sturct with more than one field;
+/// Since the argument marshalling logic is largely the same within each category,
+/// only the first example in each category checks the entire invocation process,
+/// while the other examples only check the signatures.
+
+// REQUIRES: loongarch-registered-target
+// RUN: fir-opt --split-input-file --target-rewrite="target=loongarch64-unknown-linux-gnu" %s | FileCheck %s
+
+
+/// *********************** Struct with a single intrinsic component *********************** ///
+
+!ty_i16   = !fir.type<ti16{i:i16}>
+!ty_i32   = !fir.type<ti32{i:i32}>
+!ty_i64   = !fir.type<ti64{i:i64}>
+!ty_i128  = !fir.type<ti128{i:i128}>
+!ty_f16   = !fir.type<tf16{i:f16}>
+!ty_f32   = !fir.type<tf32{i:f32}>
+!ty_f64   = !fir.type<tf64{i:f64}>
+!ty_f128  = !fir.type<tf128{i:f128}>
+!ty_bf16  = !fir.type<tbf16{i:bf16}>
+!ty_char1 = !fir.type<tchar1{i:!fir.char<1>}>
+!ty_char2 = !fir.type<tchar2{i:!fir.char<2>}>
+!ty_log1  = !fir.type<tlog1{i:!fir.logical<1>}>
+!ty_log2  = !fir.type<tlog2{i:!fir.logical<2>}>
+!ty_log4  = !fir.type<tlog4{i:!fir.logical<4>}>
+!ty_log8  = !fir.type<tlog8{i:!fir.logical<8>}>
+!ty_log16 = !fir.type<tlog16{i:!fir.logical<16>}>
+!ty_cmplx_f32  = !fir.type<tcmplx_f32{i:complex<f32>}>
+!ty_cmplx_f64  = !fir.type<tcmplx_f64{i:complex<f64>}>
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", llvm.target_triple = "loongarch64-unknown-linux-gnu"} {
+
+// CHECK-LABEL: func.func private @test_func_i16(i64)
+func.func private @test_func_i16(%arg0: !ty_i16)
+// CHECK-LABEL: func.func @test_call_i16(
+// CHECK-SAME:    %[[ARG0:.*]]: !fir.ref<!fir.type<ti16{i:i16}>>) {
+func.func @test_call_i16(%arg0: !fir.ref<!ty_i16>) {
+  // CHECK: %[[IN:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.type<ti16{i:i16}>>
+  // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr
+  // CHECK: %[[ARR:.*]] = fir.alloca i64
+  // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref<i64>) -> !fir.ref<!fir.type<ti16{i:i16}>>
+  // CHECK: fir.store %[[IN]] to %[[CVT]] : !fir.ref<!fir.type<ti16{i:i16}>>
+  // CHECK: %[[LD:.*]] = fir.load %[[ARR]] : !fir.ref<i64>
+  %in = fir.load %arg0 : !fir.ref<!ty_i16>
+  // CHECK: fir.call @test_func_i16(%[[LD]]) : (i64) -> ()
+  // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr
+  fir.call @test_func_i16(%in) : (!ty_i16) -> ()
+  // CHECK: return
+  return
+}
+
+// CHECK-LABEL: func.func private @test_func_i32(i64)
+func.func private @test_func_i32(%arg0: !ty_i32)
+
+// CHECK-LABEL: func.func private @test_func_i64(i64)
+func.func private @test_func_i64(%arg0: !ty_i64)
+
+// CHECK-LABEL: func.func private @test_func_i128(i128)
+func.func private @test_func_i128(%arg0: !ty_i128)
+
+// CHECK-LABEL: func.func private @test_func_f16(i64)
+func.func private @test_func_f16(%arg0: !ty_f16)
+
+// CHECK-LABEL: func.func private @test_func_f32(f32)
+func.func private @test_func_f32(%arg0: !ty_f32)
+
+// CHECK-LABEL: func.func private @test_func_f64(f64)
+func.func private @test_func_f64(%arg0: !ty_f64)
+
+// CHECK-LABEL: func.func private @test_func_f128(i128)
+func.func private @test_func_f128(%arg0: !ty_f128)
+
+// CHECK-LABEL: func.func private @test_func_bf16(i64)
+func.func private @test_func_bf16(%arg0: !ty_bf16)
+
+// CHECK-LABEL: func.func private @test_func_char1(i64)
+func.func private @test_func_char1(%arg0: !ty_char1)
+
+// CHECK-LABEL: func.func private @test_func_char2(i64)
+func.func private @test_func_char2(%arg0: !ty_char2)
+
+// CHECK-LABEL: func.func private @test_func_log1(i64)
+func.func private @test_func_log1(%arg0: !ty_log1)
+
+// CHECK-LABEL: func.func private @test_func_log2(i64)
+func.func private @test_func_log2(%arg0: !ty_log2)
+
+// CHECK-LABEL: func.func private @test_func_log4(i64)
+func.func private @test_func_log4(%arg0: !ty_log4)
+
+// CHECK-LABEL: func.func private @test_func_log8(i64)
+func.func private @test_func_log8(%arg0: !ty_log8)
+
+// CHECK-LABEL: func.func private @test_func_log16(i128)
+func.func private @test_func_log16(%arg0: !ty_log16)
+
+// CHECK-LABEL: func.func private @test_func_cmplx_f32(f32, f32)
+func.func private @test_func_cmplx_f32(%arg0: !ty_cmplx_f32)
+
+// CHECK-LABEL: func.func private @test_func_cmplx_f64(f64, f64)
+func.func private @test_func_cmplx_f64(%arg0: !ty_cmplx_f64)
+}
+
+
+/// *************************** Struct with more than one field **************************** ///
+
+// -----
+
+!ty_i32_f32 = !fir.type<ti32_f32{i:i32,j:f32}>
+!ty_i32_f64 = !fir.type<ti32_f64{i:i32,j:f64}>
+!ty_i64_f32 = !fir.type<ti64_f32{i:i64,j:f32}>
+!ty_i64_f64 = !fir.type<ti64_f64{i:i64,j:f64}>
+!ty_f64_i64 = !fir.type<tf64_i64{i:f64,j:i64}>
+!ty_f16_f16 = !fir.type<tf16_f16{i:f16,j:f16}>
+!ty_f32_f32 = !fir.type<tf32_f32{i:f32,j:f32}>
+!ty_f64_f64 = !fir.type<tf64_f64{i:f64,j:f64}>
+!ty_f32_i32_i32 = !fir.type<tf32_i32_i32{i:f32,j:i32,k:i32}>
+!ty_f32_f32_i32 = !fir.type<tf32_f32_i32{i:f32,j:f32,k:i32}>
+!ty_f32_f32_f32 = !fir.type<tf32_f32_f32{i:f32,j:f32,k:f32}>
+
+!ty_i8_a8  = !fir.type<ti8_a8{i:!fir.array<8xi8>}>
+!ty_i8_a16 = !fir.type<ti8_a16{i:!fir.array<16xi8>}>
+!ty_f32_a2 = !fir.type<tf32_a2{i:!fir.array<2xf32>}>
+!ty_f64_a2 = !fir.type<tf64_a2{i:!fir.array<2xf64>}>
+!ty_nested_i32_f32 = !fir.type<t11{i:!ty_i32_f32}>
+!ty_nested_i8_a8_i32 = !fir.type<t12{i:!ty_i8_a8, j:i32}>
+!ty_char1_a8 = !fir.type<t_char_a8{i:!fir.array<8x!fir.char<1>>}>
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", llvm.target_triple = "loongarch64-unknown-linux-gnu"} {
+
+// CHECK-LABEL: func.func private @test_func_i32_f32(i32, f32)
+func.func private @test_func_i32_f32(%arg0: !ty_i32_f32)
+// CHECK-LABEL: func.func @test_call_i32_f32(
+// CHECK-SAME:    %[[ARG0:.*]]: !fir.ref<!fir.type<ti32_f32{i:i32,j:f32}>>) {
+func.func @test_call_i32_f32(%arg0: !fir.ref<!ty_i32_f32>) {
+  // CHECK: %[[IN:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.type<ti32_f32{i:i32,j:f32}>>
+  // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr
+  // CHECK: %[[ARR:.*]] = fir.alloca tuple<i32, f32>
+  // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref<tuple<i32, f32>>) -> !fir.ref<!fir.type<ti32_f32{i:i32,j:f32}>>
+  // CHECK: fir.store %[[IN]] to %[[CVT]] : !fir.ref<!fir.type<ti32_f32{i:i32,j:f32}>>
+  // CHECK: %[[LD:.*]] = fir.load %[[ARR]] : !fir.ref<tuple<i32, f32>>
+  // CHECK: %[[VAL_0:.*]] = fir.extract_value %[[LD]], [0 : i32] : (tuple<i32, f32>) -> i32
+  // CHECK: %[[VAL_1:.*]] = fir.extract_value %[[LD]], [1 : i32] : (tuple<i32, f32>) -> f32
+  %in = fir.load %arg0 : !fir.ref<!ty_i32_f32>
+  // CHECK: fir.call @test_func_i32_f32(%[[VAL_0]], %[[VAL_1]]) : (i32, f32) -> ()
+  // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr
+  fir.call @test_func_i32_f32(%in) : (!ty_i32_f32) -> ()
+  // CHECK: return
+  return
+}
+
+// CHECK-LABEL: func.func private @test_func_i32_f64(i32, f64)
+func.func private @test_func_i32_f64(%arg0: !ty_i32_f64)
+
+// CHECK-LABEL: func.func private @test_func_i64_f32(i64, f32)
+func.func private @test_func_i64_f32(%arg0: !ty_i64_f32)
+
+// CHECK-LABEL: func.func private @test_func_i64_f64(i64, f64)
+func.func private @test_func_i64_f64(%arg0: !ty_i64_f64)
+
+// CHECK-LABEL: func.func private @test_func_f64_i64(f64, i64)
+func.func private @test_func_f64_i64(%arg0: !ty_f64_i64)
+
+// CHECK-LABEL: func.func private @test_func_f16_f16(i64)
+func.func private @test_func_f16_f16(%arg0: !ty_f16_f16)
+
+// CHECK-LABEL: func.func private @test_func_f32_f32(f32, f32)
+func.func private @test_func_f32_f32(%arg0: !ty_f32_f32)
+
+// CHECK-LABEL: func.func private @test_func_f64_f64(f64, f64)
+func.func private @test_func_f64_f64(%arg0: !ty_f64_f64)
+
+// CHECK-LABEL: func.func private @test_func_f32_i32_i32(!fir.array<2xi64>)
+func.func private @test_func_f32_i32_i32(%arg0: !ty_f32_i32_i32)
+
+// CHECK-LABEL: func.func private @test_func_f32_f32_i32(!fir.array<2xi64>)
+func.func private @test_func_f32_f32_i32(%arg0: !ty_f32_f32_i32)
+
+// CHECK-LABEL: func.func private @test_func_f32_f32_f32(!fir.array<2xi64>)
+func.func private @test_func_f32_f32_f32(%arg0: !ty_f32_f32_f32)
+
+// CHECK-LABEL: func.func private @test_func_i8_a8(i64)
+func.func private @test_func_i8_a8(%arg0: !ty_i8_a8)
+
+// CHECK-LABEL: func.func private @test_func_i8_a16(!fir.array<2xi64>)
+func.func private @test_func_i8_a16(%arg0: !ty_i8_a16)
+
+// CHECK-LABEL: func.func private @test_func_f32_a2(f32, f32)
+func.func private @test_func_f32_a2(%arg0: !ty_f32_a2)
+
+// CHECK-LABEL: func.func private @test_func_f64_a2(f64, f64)
+func.func private @test_func_f64_a2(%arg0: !ty_f64_a2)
+
+// CHECK-LABEL: func.func private @test_func_nested_i32_f32(i32, f32)
+func.func private @test_func_nested_i32_f32(%arg0: !ty_nested_i32_f32)
+
+// CHECK-LABEL: func.func private @test_func_nested_i8_a8_i32(!fir.array<2xi64>)
+func.func private @test_func_nested_i8_a8_i32(%arg0: !ty_nested_i8_a8_i32)
+
+
+// CHECK: func.func private @not_enough_int_reg_1(i32, i32, i32, i32, i32, i32, i32, i32, i64)
+func.func private @not_enough_int_reg_1(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32,
+                                        %arg5: i32, %arg6: i32, %arg7: i32, %arg8: !ty_i32_f32)
+
+// CHECK: func.func private @not_enough_int_reg_1b(!fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, i64)
+func.func private @not_enough_int_reg_1b(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>, %arg2: !fir.ref<i32>, %arg3: !fir.ref<i32>, %arg4: !fir.ref<i32>,
+                                         %arg5: !fir.ref<i32>, %arg6: !fir.ref<i32>, %arg7: !fir.ref<i32>, %arg8: !ty_i32_f32)
+
+// CHECK: func.func private @not_enough_int_reg_2(i32, i32, i32, i32, i32, i32, i32, i32, !fir.array<2xi64>)
+func.func private @not_enough_int_reg_2(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32,
+                                        %arg5: i32, %arg6: i32, %arg7: i32, %arg8: !ty_i64_f64)
+
+// CHECK: func.func private @not_enough_fp_reg_1(f32, f32, f32, f32, f32, f32, f32, f32, i64)
+func.func private @not_enough_fp_reg_1(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32,
+                                        %arg5: f32, %arg6: f32, %arg7: f32, %arg8: !ty_i32_f32)
+
+// CHECK: func.func private @not_enough_fp_reg_1b(!fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>, !fir.ref<f32>, i64)
+func.func private @not_enough_fp_reg_1b(%arg0: !fir.ref<f32>, %arg1: !fir.ref<f32>, %arg2: !fir.ref<f32>, %arg3: !fir.ref<f32>, %arg4: !fir.ref<f32>,
+                                        %arg5: !fir.ref<f32>, %arg6: !fir.ref<f32>, %arg7: !fir.ref<f32>, %arg8: !ty_i32_f32)
+
+// CHECK: func.func private @not_enough_fp_reg_2(f32, f32, f32, f32, f32, f32, f32, f32, !fir.array<2xi64>)
+func.func private @not_enough_fp_reg_2(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32,
+                                        %arg5: f32, %arg6: f32, %arg7: f32, %arg8: !ty_i64_f64)
+
+// CHECK: func.func private @char_not_enough_int_reg(i32, i32, i32, i32, i32, i32, i32, i32, i64)
+func.func private @char_not_enough_int_reg(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32,
+                                        %arg5: i32, %arg6: i32, %arg7: i32, %arg8: !ty_char1_a8)
+}
diff --git a/flang/test/Fir/struct-passing-return-loongarch64-bystack.fir b/flang/test/Fir/struct-passing-return-loongarch64-bystack.fir
new file mode 100644
index 00000000000000..5041a39e697988
--- /dev/null
+++ b/flang/test/Fir/struct-passing-return-loongarch64-bystack.fir
@@ -0,0 +1,80 @@
+/// Test LoongArch64 ABI rewrite of struct passed and returned by value (BIND(C), VALUE derived types).
+/// This test test cases where the struct must be passed or returned on the stack.
+
+// REQUIRES: loongarch-registered-target
+// RUN: tco --target=loongarch64-unknown-linux-gnu %s | FileCheck %s
+
+!ty_int_toobig = !fir.type<int_toobig{i:!fir.array<5xi32>}>
+!ty_int_toobig_align16 = !fir.type<int_toobig_align16{i:i128,j:i8}>
+!ty_fp_toobig = !fir.type<fp_toobig{i:!fir.array<5xf64>}>
+!ty_fp_toobig_align16 = !fir.type<fp_toobig_align16{i:f128,j:f32}>
+
+!ty_i32_f32 = !fir.type<i32_f32{i:i32,j:f32}>
+!ty_nested_toobig = !fir.type<nested_toobig{i:!fir.array<3x!ty_i32_f32>}>
+!ty_badly_aligned = !fir.type<badly_aligned{i:f32,j:f64,k:f32}>
+!ty_logical_toobig = !fir.type<logical_toobig{i:!fir.array<17x!fir.logical<1>>}>
+!ty_cmplx_toobig = !fir.type<cmplx_toobig{i:!fir.array<4xcomplex<f32>>}>
+!ty_char_toobig = !fir.type<char_toobig{i:!fir.array<17x!fir.char<1>>}>
+!ty_cmplx_f128 = !fir.type<cmplx_f128{i:complex<f128>}>
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", llvm.target_triple = "loongarch64-unknown-linux-gnu"} {
+
+// CHECK: declare void @takes_int_toobig(ptr byval(%int_toobig) align 4)
+func.func private @takes_int_toobig(%arg0: !ty_int_toobig) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_int_toobig(ptr sret(%int_toobig) align 4)
+func.func private @return_int_toobig() -> !ty_int_toobig attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_int_toobig_align16(ptr byval(%int_toobig_align16) align 16)
+func.func private @takes_int_toobig_align16(%arg0: !ty_int_toobig_align16) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_int_toobig_align16(ptr sret(%int_toobig_align16) align 16)
+func.func private @return_int_toobig_align16() -> !ty_int_toobig_align16 attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_fp_toobig(ptr byval(%fp_toobig) align 8)
+func.func private @takes_fp_toobig(%arg0: !ty_fp_toobig) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_fp_toobig(ptr sret(%fp_toobig) align 8)
+func.func private @return_fp_toobig() -> !ty_fp_toobig attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_fp_toobig_align16(ptr byval(%fp_toobig_align16) align 16)
+func.func private @takes_fp_toobig_align16(%arg0: !ty_fp_toobig_align16) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_fp_toobig_align16(ptr sret(%fp_toobig_align16) align 16)
+func.func private @return_fp_toobig_align16() -> !ty_fp_toobig_align16 attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_nested_toobig(ptr byval(%nested_toobig) align 4)
+func.func private @takes_nested_toobig(%arg0: !ty_nested_toobig) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_nested_toobig(ptr sret(%nested_toobig) align 4)
+func.func private @return_nested_toobig() -> !ty_nested_toobig attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_badly_aligned(ptr byval(%badly_aligned) align 8)
+func.func private @takes_badly_aligned(%arg0: !ty_badly_aligned) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_badly_aligned(ptr sret(%badly_aligned) align 8)
+func.func private @return_badly_aligned() -> !ty_badly_aligned attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_logical_toobig(ptr byval(%logical_toobig) align 1)
+func.func private @takes_logical_toobig(%arg0: !ty_logical_toobig) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_logical_toobig(ptr sret(%logical_toobig) align 1)
+func.func private @return_logical_toobig() -> !ty_logical_toobig attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_cmplx_toobig(ptr byval(%cmplx_toobig) align 4)
+func.func private @takes_cmplx_toobig(%arg0: !ty_cmplx_toobig) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_cmplx_toobig(ptr sret(%cmplx_toobig) align 4)
+func.func private @return_cmplx_toobig() -> !ty_cmplx_toobig attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_char_toobig(ptr byval(%char_toobig) align 1)
+func.func private @takes_char_toobig(%arg0: !ty_char_toobig) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+// CHECK: declare void @return_char_toobig(ptr sret(%char_toobig) align 1)
+func.func private @return_char_toobig() -> !ty_char_toobig attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare { i32, float } @takes_and_return(float, float, float, float, float, float, float, float, ptr byval(%cmplx_f128) align 16)
+func.func private @takes_and_return(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32,
+                                    %arg5: f32, %arg6: f32, %arg7: f32, %arg8: !ty_cmplx_f128) -> !ty_i32_f32 attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_and_return2(ptr sret(%cmplx_f128) align 16, i32, i32, i32, i32, i32, i32, i32, i32, i64)
+func.func private @takes_and_return2(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32,
+                                     %arg5: i32, %arg6: i32, %arg7: i32, %arg8: !ty_i32_f32) -> !ty_cmplx_f128 attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+
+// CHECK: declare void @takes_multi_byval_arguments(ptr byval(%cmplx_f128) align 16, ptr byval(%cmplx_f128) align 16, ptr byval(%cmplx_f128) align 16, ptr byval(%cmplx_f128) align 16, ptr byval(%cmplx_f128) align 16, ptr byval(%cmplx_f128) align 16, ptr byval(%cmplx_f128) align 16, i32, float, i64)
+func.func private @takes_multi_byval_arguments(%arg0: !ty_cmplx_f128, %arg1: !ty_cmplx_f128, %arg2: !ty_cmplx_f128, %arg3: !ty_cmplx_f128,
+                                               %arg4: !ty_cmplx_f128, %arg5: !ty_cmplx_f128, %arg6: !ty_cmplx_f128,
+                                               %arg7: !ty_i32_f32, %arg8: !ty_i32_f32) attributes {fir.proc_attrs = #fir.proc_attrs<bind_c>}
+}
+
diff --git a/flang/test/Fir/struct-return-loongarch64-byreg.fir b/flang/test/Fir/struct-return-loongarch64-byreg.fir
new file mode 100644
index 00000000000000..b64cdc7ac7099f
--- /dev/null
+++ b/flang/test/Fir/struct-return-loongarch64-byreg.fir
@@ -0,0 +1,200 @@
+/// Test LoongArch64 ABI rewrite of struct returned by value (BIND(C), VALUE derived types).
+/// This test test cases where the struct can be returned in registers.
+/// Test cases can be roughly divided into two categories:
+///  - struct with a single intrinsic component;
+///  - sturct with more than one field;
+/// Since the argument marshalling logic is largely the same within each category,
+/// only the first example in each category checks the entire invocation process,
+/// while the other examples only check the signatures.
+
+// REQUIRES: loongarch-registered-target
+// RUN: fir-opt --split-input-file --target-rewrite="target=loongarch64-unknown-linux-gnu" %s | FileCheck %s
+
+
+/// *********************** Struct with a single intrinsic component *********************** ///
+
+!ty_i16   = !fir.type<ti16{i:i16}>
+!ty_i32   = !fir.type<ti32{i:i32}>
+!ty_i64   = !fir.type<ti64{i:i64}>
+!ty_i128  = !fir.type<ti128{i:i128}>
+!ty_f16   = !fir.type<tf16{i:f16}>
+!ty_f32   = !fir.type<tf32{i:f32}>
+!ty_f64   = !fir.type<tf64{i:f64}>
+!ty_f128  = !fir.type<tf128{i:f128}>
+!ty_bf16  = !fir.type<tbf16{i:bf16}>
+!ty_char1 = !fir.type<tchar1{i:!fir.char<1>}>
+!ty_char2 = !fir.type<tchar2{i:!fir.char<2>}>
+!ty_log1  = !fir.type<tlog1{i:!fir.logical<1>}>
+!ty_log2  = !fir.type<tlog2{i:!fir.logical<2>}>
+!ty_log4  = !fir.type<tlog4{i:!fir.logical<4>}>
+!ty_log8  = !fir.type<tlog8{i:!fir.logical<8>}>
+!ty_log16 = !fir.type<tlog16{i:!fir.logical<16>}>
+!ty_cmplx_f32  = !fir.type<tcmplx_f32{i:complex<f32>}>
+!ty_cmplx_f64  = !fir.type<tcmplx_f64{i:complex<f64>}>
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", llvm.target_triple = "loongarch64-unknown-linux-gnu"} {
+
+// CHECK-LABEL: func.func private @test_func_i16() -> i64
+func.func private @test_func_i16() -> !ty_i16
+// CHECK-LABEL: func.func @test_call_i16(
+// CHECK-SAME:    %[[ARG0:.*]]: !fir.ref<!fir.type<ti16{i:i16}>>) {
+func.func @test_call_i16(%arg0: !fir.ref<!ty_i16>) {
+  // CHECK: %[[OUT:.*]] = fir.call @test_func_i16() : () -> i64
+  // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr
+  // CHECK: %[[ARR:.*]] = fir.alloca i64
+  // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref<i64>
+  // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref<i64>) -> !fir.ref<!fir.type<ti16{i:i16}>>
+  // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref<!fir.type<ti16{i:i16}>>
+  // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr
+  %out = fir.call @test_func_i16() : () -> !ty_i16
+  // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref<!fir.type<ti16{i:i16}>>
+  fir.store %out to %arg0 : !fir.ref<!ty_i16>
+  // CHECK: return
+  return
+}
+
+// CHECK-LABEL: func.func private @test_func_i32() -> i64
+func.func private @test_func_i32() -> !ty_i32
+
+// CHECK-LABEL: func.func private @test_func_i64() -> i64
+func.func private @test_func_i64() -> !ty_i64
+
+// CHECK-LABEL: func.func private @test_func_i128() -> i128
+func.func private @test_func_i128() -> !ty_i128
+
+// CHECK-LABEL: func.func private @test_func_f16() -> i64
+func.func private @test_func_f16() -> !ty_f16
+
+// CHECK-LABEL: func.func private @test_func_f32() -> f32
+func.func private @test_func_f32() -> !ty_f32
+
+// CHECK-LABEL: func.func private @test_func_f64() -> f64
+func.func private @test_func_f64() -> !ty_f64
+
+// CHECK-LABEL: func.func private @test_func_f128() -> i128
+func.func private @test_func_f128() -> !ty_f128
+
+// CHECK-LABEL: func.func private @test_func_bf16() -> i64
+func.func private @test_func_bf16() -> !ty_bf16
+
+// CHECK-LABEL: func.func private @test_func_char1() -> i64
+func.func private @test_func_char1() -> !ty_char1
+
+// CHECK-LABEL: func.func private @test_func_char2() -> i64
+func.func private @test_func_char2() -> !ty_char2
+
+// CHECK-LABEL: func.func private @test_func_log1() -> i64
+func.func private @test_func_log1() -> !ty_log1
+
+// CHECK-LABEL: func.func private @test_func_log2() -> i64
+func.func private @test_func_log2() -> !ty_log2
+
+// CHECK-LABEL: func.func private @test_func_log4() -> i64
+func.func private @test_func_log4() -> !ty_log4
+
+// CHECK-LABEL: func.func private @test_func_log8() -> i64
+func.func private @test_func_log8() -> !ty_log8
+
+// CHECK-LABEL: func.func private @test_func_log16() -> i128
+func.func private @test_func_log16() -> !ty_log16
+
+// CHECK-LABEL: func.func private @test_func_cmplx_f32() -> tuple<f32, f32>
+func.func private @test_func_cmplx_f32() -> !ty_cmplx_f32
+
+// CHECK-LABEL: func.func private @test_func_cmplx_f64() -> tuple<f64, f64>
+func.func private @test_func_cmplx_f64() -> !ty_cmplx_f64
+}
+
+
+/// *************************** Struct with more than one field **************************** ///
+
+// -----
+
+!ty_i32_f32 = !fir.type<ti32_f32{i:i32,j:f32}>
+!ty_i32_f64 = !fir.type<ti32_f64{i:i32,j:f64}>
+!ty_i64_f32 = !fir.type<ti64_f32{i:i64,j:f32}>
+!ty_i64_f64 = !fir.type<ti64_f64{i:i64,j:f64}>
+!ty_f64_i64 = !fir.type<tf64_i64{i:f64,j:i64}>
+!ty_f16_f16 = !fir.type<tf16_f16{i:f16,j:f16}>
+!ty_f32_f32 = !fir.type<tf32_f32{i:f32,j:f32}>
+!ty_f64_f64 = !fir.type<tf64_f64{i:f64,j:f64}>
+!ty_f32_i32_i32 = !fir.type<tf32_i32_i32{i:f32,j:i32,k:i32}>
+!ty_f32_f32_i32 = !fir.type<tf32_f32_i32{i:f32,j:f32,k:i32}>
+!ty_f32_f32_f32 = !fir.type<tf32_f32_f32{i:f32,j:f32,k:f32}>
+
+!ty_i8_a8  = !fir.type<ti8_a8{i:!fir.array<8xi8>}>
+!ty_i8_a16 = !fir.type<ti8_a16{i:!fir.array<16xi8>}>
+!ty_f32_a2 = !fir.type<tf32_a2{i:!fir.array<2xf32>}>
+!ty_f64_a2 = !fir.type<tf64_a2{i:!fir.array<2xf64>}>
+!ty_nested_i32_f32 = !fir.type<t11{i:!ty_i32_f32}>
+!ty_nested_i8_a8_i32 = !fir.type<t12{i:!ty_i8_a8, j:i32}>
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", llvm.target_triple = "loongarch64-unknown-linux-gnu"} {
+
+// CHECK-LABEL: func.func private @test_func_i32_f32() -> tuple<i32, f32>
+func.func private @test_func_i32_f32() -> !ty_i32_f32
+// CHECK-LABEL: func.func @test_call_i32_f32(
+// CHECK-SAME:    %[[ARG0:.*]]: !fir.ref<!fir.type<ti32_f32{i:i32,j:f32}>>) {
+func.func @test_call_i32_f32(%arg0: !fir.ref<!ty_i32_f32>) {
+  // CHECK: %[[OUT:.*]] = fir.call @test_func_i32_f32() : () -> tuple<i32, f32>
+  // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr
+  // CHECK: %[[ARR:.*]] = fir.alloca tuple<i32, f32>
+  // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref<tuple<i32, f32>>
+  // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref<tuple<i32, f32>>) -> !fir.ref<!fir.type<ti32_f32{i:i32,j:f32}>>
+  // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref<!fir.type<ti32_f32{i:i32,j:f32}>>
+  // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr
+  %out = fir.call @test_func_i32_f32() : () -> !ty_i32_f32
+  // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref<!fir.type<ti32_f32{i:i32,j:f32}>>
+  fir.store %out to %arg0 : !fir.ref<!ty_i32_f32>
+  // CHECK: return
+  return
+}
+
+// CHECK-LABEL: func.func private @test_func_i32_f64() -> tuple<i32, f64>
+func.func private @test_func_i32_f64() -> !ty_i32_f64
+
+// CHECK-LABEL: func.func private @test_func_i64_f32() -> tuple<i64, f32>
+func.func private @test_func_i64_f32() -> !ty_i64_f32
+
+// CHECK-LABEL: func.func private @test_func_i64_f64() -> tuple<i64, f64>
+func.func private @test_func_i64_f64() -> !ty_i64_f64
+
+// CHECK-LABEL: func.func private @test_func_f64_i64() -> tuple<f64, i64>
+func.func private @test_func_f64_i64() -> !ty_f64_i64
+
+// CHECK-LABEL: func.func private @test_func_f16_f16() -> i64
+func.func private @test_func_f16_f16() -> !ty_f16_f16
+
+// CHECK-LABEL: func.func private @test_func_f32_f32() -> tuple<f32, f32>
+func.func private @test_func_f32_f32() -> !ty_f32_f32
+
+// CHECK-LABEL: func.func private @test_func_f64_f64() -> tuple<f64, f64>
+func.func private @test_func_f64_f64() -> !ty_f64_f64
+
+// CHECK-LABEL: func.func private @test_func_f32_i32_i32() -> !fir.array<2xi64>
+func.func private @test_func_f32_i32_i32() -> !ty_f32_i32_i32
+
+// CHECK-LABEL: func.func private @test_func_f32_f32_i32() -> !fir.array<2xi64>
+func.func private @test_func_f32_f32_i32() -> !ty_f32_f32_i32
+
+// CHECK-LABEL: func.func private @test_func_f32_f32_f32() -> !fir.array<2xi64>
+func.func private @test_func_f32_f32_f32() -> !ty_f32_f32_f32
+
+// CHECK-LABEL: func.func private @test_func_i8_a8() -> i64
+func.func private @test_func_i8_a8() -> !ty_i8_a8
+
+// CHECK-LABEL: func.func private @test_func_i8_a16() -> !fir.array<2xi64>
+func.func private @test_func_i8_a16() -> !ty_i8_a16
+
+// CHECK-LABEL: func.func private @test_func_f32_a2() -> tuple<f32, f32>
+func.func private @test_func_f32_a2() -> !ty_f32_a2
+
+// CHECK-LABEL: func.func private @test_func_f64_a2() -> tuple<f64, f64>
+func.func private @test_func_f64_a2() -> !ty_f64_a2
+
+// CHECK-LABEL: func.func private @test_func_nested_i32_f32() -> tuple<i32, f32>
+func.func private @test_func_nested_i32_f32() -> !ty_nested_i32_f32
+
+// CHECK-LABEL: func.func private @test_func_nested_i8_a8_i32() -> !fir.array<2xi64>
+func.func private @test_func_nested_i8_a8_i32() -> !ty_nested_i8_a8_i32
+}