[clang] [llvm] [LoongArch] Add support for half-precision floating-point type (PR #141564)
via cfe-commits
cfe-commits at lists.llvm.org
Tue May 27 01:44:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-backend-loongarch
Author: None (Ami-zhang)
<details>
<summary>Changes</summary>
This PR contains 3 commits:
1. Updated the FP16 implementation to pass arguments via FPR instead of the original GPR.
2. Added support for the _Float16 type and fixed 2 related issues.
3. Added support for the __bf16 type.
---
Patch is 219.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141564.diff
15 Files Affected:
- (modified) clang/docs/LanguageExtensions.rst (+2)
- (modified) clang/lib/Basic/Targets/LoongArch.h (+8)
- (modified) clang/lib/CodeGen/Targets/LoongArch.cpp (+3-4)
- (added) clang/test/CodeGen/LoongArch/__fp16-convert.c (+30)
- (modified) clang/test/CodeGen/LoongArch/abi-lp64d.c (+71)
- (added) clang/test/CodeGen/LoongArch/bfloat-abi.c (+611)
- (added) clang/test/CodeGen/LoongArch/bfloat-mangle.cpp (+19)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+181-3)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+26)
- (added) llvm/test/CodeGen/LoongArch/bf16-promote.ll (+172)
- (added) llvm/test/CodeGen/LoongArch/bf16.ll (+1048)
- (added) llvm/test/CodeGen/LoongArch/calling-conv-half.ll (+1626)
- (modified) llvm/test/CodeGen/LoongArch/fp16-promote.ll (+131-71)
- (added) llvm/test/CodeGen/LoongArch/issue97975.ll (+438)
- (added) llvm/test/CodeGen/LoongArch/issue97981.ll (+127)
``````````diff
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index a40dd4d1a1673..4fa91b95c45e0 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1001,6 +1001,7 @@ to ``float``; see below for more information on this emulation.
* X86 (if SSE2 is available; natively if AVX512-FP16 is also available)
* RISC-V (natively if Zfh or Zhinx is available)
* SystemZ (emulated)
+ * LoongArch
* ``__bf16`` is supported on the following targets (currently never natively):
@@ -1008,6 +1009,7 @@ to ``float``; see below for more information on this emulation.
* 64-bit ARM (AArch64)
* RISC-V
* X86 (when SSE2 is available)
+ * LoongArch
(For X86, SSE2 is available on 64-bit and all recent 32-bit processors.)
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index 4c7b53abfef9b..7e9affc98ac0f 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -49,10 +49,14 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
HasFeatureLD_SEQ_SA = false;
HasFeatureDiv32 = false;
HasFeatureSCQ = false;
+ BFloat16Width = 16;
+ BFloat16Align = 16;
+ BFloat16Format = &llvm::APFloat::BFloat();
LongDoubleWidth = 128;
LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::IEEEquad();
MCountName = "_mcount";
+ HasFloat16 = true;
SuitableAlign = 128;
WCharType = SignedInt;
WIntType = UnsignedInt;
@@ -98,6 +102,10 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
bool hasBitIntType() const override { return true; }
+ bool hasBFloat16Type() const override { return true; }
+
+ bool useFP16ConversionIntrinsics() const override { return false; }
+
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;
diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp
index 0f689371a60db..7640f3779816a 100644
--- a/clang/lib/CodeGen/Targets/LoongArch.cpp
+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp
@@ -110,10 +110,9 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper(
uint64_t Size = getContext().getTypeSize(Ty);
if (IsInt && Size > GRLen)
return false;
- // Can't be eligible if larger than the FP registers. Half precision isn't
- // currently supported on LoongArch and the ABI hasn't been confirmed, so
- // default to the integer ABI in that case.
- if (IsFloat && (Size > FRLen || Size < 32))
+ // Can't be eligible if larger than the FP registers. Handling of half
+ // precision values has been specified in the ABI, so don't block those.
+ if (IsFloat && Size > FRLen)
return false;
// Can't be eligible if an integer type was already found (int+int pairs
// are not eligible).
diff --git a/clang/test/CodeGen/LoongArch/__fp16-convert.c b/clang/test/CodeGen/LoongArch/__fp16-convert.c
new file mode 100644
index 0000000000000..84ef5de960b47
--- /dev/null
+++ b/clang/test/CodeGen/LoongArch/__fp16-convert.c
@@ -0,0 +1,30 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple loongarch64 -emit-llvm %s -o - \
+// RUN: | FileCheck %s
+
+__fp16 y;
+short z;
+// CHECK-LABEL: define dso_local void @bar1(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @y, align 2
+// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[CONV]] to i16
+// CHECK-NEXT: store i16 [[CONV1]], ptr @z, align 2
+// CHECK-NEXT: ret void
+//
+void bar1(){
+ z = y;
+}
+// CHECK-LABEL: define dso_local void @bar2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @z, align 2
+// CHECK-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[CONV]] to half
+// CHECK-NEXT: store half [[CONV1]], ptr @y, align 2
+// CHECK-NEXT: ret void
+//
+void bar2(){
+ y = z;
+}
diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c
index fc7f1eada586b..9f64cfd662e5f 100644
--- a/clang/test/CodeGen/LoongArch/abi-lp64d.c
+++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c
@@ -48,6 +48,9 @@ unsigned long check_ulong() { return 0; }
// CHECK-LABEL: define{{.*}} i64 @check_ulonglong()
unsigned long long check_ulonglong() { return 0; }
+// CHECK-LABEL: define{{.*}} half @check_float16()
+_Float16 check_float16() { return 0; }
+
// CHECK-LABEL: define{{.*}} float @check_float()
float check_float() { return 0; }
@@ -127,6 +130,14 @@ struct i16x4_s f_i16x4_s(struct i16x4_s x) {
/// available, the value is passed in a GAR; if no GAR is available, the value
/// is passed on the stack.
+struct f16x1_s {
+ __fp16 a;
+};
+
+struct float16x1_s {
+ _Float16 a;
+};
+
struct f32x1_s {
float a;
};
@@ -135,6 +146,16 @@ struct f64x1_s {
double a;
};
+// CHECK-LABEL: define{{.*}} half @f_f16x1_s(half %0)
+struct f16x1_s f_f16x1_s(struct f16x1_s x) {
+ return x;
+}
+
+// CHECK-LABEL: define{{.*}} half @f_float16x1_s(half %0)
+struct float16x1_s f_float16x1_s(struct float16x1_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0)
struct f32x1_s f_f32x1_s(struct f32x1_s x) {
return x;
@@ -151,10 +172,20 @@ struct f64x1_s f_f64x1_s(struct f64x1_s x) {
/// number of available FAR is less than 2, it’s passed in a GAR, and passed on
/// the stack if no GAR is available.
+struct f16x2_s {
+ __fp16 a;
+ _Float16 b;
+};
+
struct f32x2_s {
float a, b;
};
+// CHECK-LABEL: define{{.*}} { half, half } @f_f16x2_s(half %0, half %1)
+struct f16x2_s f_f16x2_s(struct f16x2_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1)
struct f32x2_s f_f32x2_s(struct f32x2_s x) {
return x;
@@ -165,11 +196,21 @@ struct f32x2_s f_f32x2_s(struct f32x2_s x) {
/// i. Multiple fixed-point members. If there are available GAR, the structure
/// is passed in a GAR, and passed on the stack if no GAR is available.
+struct f16x1_i16x2_s {
+ _Float16 a;
+ int16_t b, c;
+};
+
struct f32x1_i16x2_s {
float a;
int16_t b, c;
};
+// CHECK-LABEL: define{{.*}} i64 @f_f16x1_i16x2_s(i64 %x.coerce)
+struct f16x1_i16x2_s f_f16x1_i16x2_s(struct f16x1_i16x2_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce)
struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
return x;
@@ -181,11 +222,21 @@ struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s
/// passed on the stack.
+struct f16x1_i32x1_s {
+ _Float16 a;
+ int32_t b;
+};
+
struct f32x1_i32x1_s {
float a;
int32_t b;
};
+// CHECK-LABEL: define{{.*}} { half, i32 } @f_f16x1_i32x1_s(half %0, i32 %1)
+struct f16x1_i32x1_s f_f16x1_i32x1_s(struct f16x1_i32x1_s x) {
+ return x;
+}
+
// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1)
struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) {
return x;
@@ -253,6 +304,16 @@ struct f32x4_s f_f32x4_s(struct f32x4_s x) {
return x;
}
+struct f16x5_s {
+ _Float16 a, b, c, d;
+ __fp16 e;
+};
+
+// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x5_s([2 x i64] %x.coerce)
+struct f16x5_s f_f16x5_s(struct f16x5_s x) {
+ return x;
+}
+
/// ii. The structure with two double members is passed in a pair of available
/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with
/// one double member and one float member is same.
@@ -312,6 +373,16 @@ struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) {
return x;
}
+struct f16x4_i32x2_s {
+ _Float16 a, b, c, d;
+ int32_t e, f;
+};
+
+// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x4_i32x2_s([2 x i64] %x.coerce)
+struct f16x4_i32x2_s f_f16x4_i32x2_s(struct f16x4_i32x2_s x) {
+ return x;
+}
+
/// 3. WOA > 2 × GRLEN
/// a. It’s passed by reference and are replaced in the argument list with the
/// address. If there is an available GAR, the reference is passed in the GAR,
diff --git a/clang/test/CodeGen/LoongArch/bfloat-abi.c b/clang/test/CodeGen/LoongArch/bfloat-abi.c
new file mode 100644
index 0000000000000..9f0e25c17cc74
--- /dev/null
+++ b/clang/test/CodeGen/LoongArch/bfloat-abi.c
@@ -0,0 +1,611 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple loongarch64 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-LA64
+// RUN: %clang_cc1 -triple loongarch32 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-LA32
+
+struct bfloat1 {
+ __bf16 a;
+};
+
+// CHECK-LA64-LABEL: define dso_local bfloat @h1
+// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-LA64-NEXT: entry:
+// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2
+// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
+// CHECK-LA64-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { bfloat }, ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[TMP1]], align 2
+// CHECK-LA64-NEXT: ret bfloat [[TMP2]]
+//
+// CHECK-LA32-LABEL: define dso_local bfloat @h1
+// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-LA32-NEXT: entry:
+// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2
+// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
+// CHECK-LA32-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { bfloat }, ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[TMP1]], align 2
+// CHECK-LA32-NEXT: ret bfloat [[TMP2]]
+//
+struct bfloat1 h1(__bf16 a) {
+ struct bfloat1 x;
+ x.a = a;
+ return x;
+}
+
+struct bfloat2 {
+ __bf16 a;
+ __bf16 b;
+};
+
+// CHECK-LA64-LABEL: define dso_local { bfloat, bfloat } @h2
+// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-LA64-NEXT: entry:
+// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2
+// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
+// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
+// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
+// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 1
+// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
+// CHECK-LA64-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[TMP2]], align 2
+// CHECK-LA64-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 1
+// CHECK-LA64-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[TMP4]], align 2
+// CHECK-LA64-NEXT: [[TMP6:%.*]] = insertvalue { bfloat, bfloat } poison, bfloat [[TMP3]], 0
+// CHECK-LA64-NEXT: [[TMP7:%.*]] = insertvalue { bfloat, bfloat } [[TMP6]], bfloat [[TMP5]], 1
+// CHECK-LA64-NEXT: ret { bfloat, bfloat } [[TMP7]]
+//
+// CHECK-LA32-LABEL: define dso_local { bfloat, bfloat } @h2
+// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-LA32-NEXT: entry:
+// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2
+// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-LA32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
+// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
+// CHECK-LA32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
+// CHECK-LA32-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 1
+// CHECK-LA32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
+// CHECK-LA32-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA32-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[TMP2]], align 2
+// CHECK-LA32-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 1
+// CHECK-LA32-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[TMP4]], align 2
+// CHECK-LA32-NEXT: [[TMP6:%.*]] = insertvalue { bfloat, bfloat } poison, bfloat [[TMP3]], 0
+// CHECK-LA32-NEXT: [[TMP7:%.*]] = insertvalue { bfloat, bfloat } [[TMP6]], bfloat [[TMP5]], 1
+// CHECK-LA32-NEXT: ret { bfloat, bfloat } [[TMP7]]
+//
+struct bfloat2 h2(__bf16 a, __bf16 b) {
+ struct bfloat2 x;
+ x.a = a;
+ x.b = b;
+ return x;
+}
+
+struct bfloat3 {
+ __bf16 a;
+ __bf16 b;
+ __bf16 c;
+};
+
+// CHECK-LA64-LABEL: define dso_local i64 @h3
+// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-LA64-NEXT: entry:
+// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2
+// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: [[RETVAL_COERCE:%.*]] = alloca i64, align 8
+// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
+// CHECK-LA64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
+// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
+// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
+// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 1
+// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
+// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
+// CHECK-LA64-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2
+// CHECK-LA64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
+// CHECK-LA64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 6, i1 false)
+// CHECK-LA64-NEXT: [[TMP3:%.*]] = load i64, ptr [[RETVAL_COERCE]], align 8
+// CHECK-LA64-NEXT: ret i64 [[TMP3]]
+//
+// CHECK-LA32-LABEL: define dso_local [2 x i32] @h3
+// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-LA32-NEXT: entry:
+// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2
+// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i32], align 4
+// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-LA32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
+// CHECK-LA32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
+// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
+// CHECK-LA32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
+// CHECK-LA32-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 1
+// CHECK-LA32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
+// CHECK-LA32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
+// CHECK-LA32-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2
+// CHECK-LA32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
+// CHECK-LA32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i32 6, i1 false)
+// CHECK-LA32-NEXT: [[TMP3:%.*]] = load [2 x i32], ptr [[RETVAL_COERCE]], align 4
+// CHECK-LA32-NEXT: ret [2 x i32] [[TMP3]]
+//
+struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) {
+ struct bfloat3 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ return x;
+}
+
+struct bfloat4 {
+ __bf16 a;
+ __bf16 b;
+ __bf16 c;
+ __bf16 d;
+};
+
+// CHECK-LA64-LABEL: define dso_local i64 @h4
+// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-LA64-NEXT: entry:
+// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2
+// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
+// CHECK-LA64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
+// CHECK-LA64-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
+// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
+// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
+// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 1
+// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
+// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
+// CHECK-LA64-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2
+// CHECK-LA64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
+// CHECK-LA64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
+// CHECK-LA64-NEXT: [[D4:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 3
+// CHECK-LA64-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2
+// CHECK-LA64-NEXT: [[TMP4:%.*]] = load i64, ptr [[RETVAL]], align 2
+// CHECK-LA64-NEXT: ret i64 [[TMP4]]
+//
+// CHECK-LA32-LABEL: define dso_local [2 x i32] @h4
+// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-LA32-NEXT: entry:
+// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2
+// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LA32-NEXT: [[D_ADDR:%.*]...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141564
More information about the cfe-commits
mailing list