[clang] [clang] fix half && bfloat16 convert node expr codegen (PR #89051)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Mon Apr 22 01:55:13 PDT 2024
================
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16 -S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[CONV]] to half
+// CHECK-NEXT: ret half [[CONV1]]
+//
+_Float16 test_convert_from_bf16_to_fp16(__bf16 a) {
+ return (_Float16)a;
+}
+
+// CHECK-LABEL: define dso_local bfloat @test_convert_from_fp16_to_bf16(
+// CHECK-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[CONV]] to bfloat
+// CHECK-NEXT: ret bfloat [[CONV1]]
+//
+__bf16 test_convert_from_fp16_to_bf16(_Float16 a) {
+ return (__bf16)a;
+}
+
+typedef _Float16 half2 __attribute__((ext_vector_type(2)));
+typedef _Float16 half4 __attribute__((ext_vector_type(4)));
+
+typedef __bf16 bfloat2 __attribute__((ext_vector_type(2)));
+typedef __bf16 bfloat4 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: define dso_local i32 @test_cast_from_fp162_to_bf162(
+// CHECK-SAME: i32 noundef [[IN_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x bfloat>, align 4
+// CHECK-NEXT: [[IN:%.*]] = alloca <2 x half>, align 4
+// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <2 x half>, align 4
+// CHECK-NEXT: store i32 [[IN_COERCE]], ptr [[IN]], align 4
+// CHECK-NEXT: [[IN1:%.*]] = load <2 x half>, ptr [[IN]], align 4
+// CHECK-NEXT: store <2 x half> [[IN1]], ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[TMP0]] to <2 x bfloat>
+// CHECK-NEXT: store <2 x bfloat> [[TMP1]], ptr [[RETVAL]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 [[TMP2]]
+//
+bfloat2 test_cast_from_fp162_to_bf162(half2 in) {
+ return (bfloat2)in;
+}
+
+
+// CHECK-LABEL: define dso_local double @test_cast_from_fp164_to_bf164(
+// CHECK-SAME: double noundef [[IN_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x bfloat>, align 8
+// CHECK-NEXT: [[IN:%.*]] = alloca <4 x half>, align 8
+// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <4 x half>, align 8
+// CHECK-NEXT: store double [[IN_COERCE]], ptr [[IN]], align 8
+// CHECK-NEXT: [[IN1:%.*]] = load <4 x half>, ptr [[IN]], align 8
+// CHECK-NEXT: store <4 x half> [[IN1]], ptr [[IN_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[IN_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <4 x bfloat>
+// CHECK-NEXT: store <4 x bfloat> [[TMP1]], ptr [[RETVAL]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8
+// CHECK-NEXT: ret double [[TMP2]]
+//
+bfloat4 test_cast_from_fp164_to_bf164(half4 in) {
+ return (bfloat4)in;
+}
+
+// CHECK-LABEL: define dso_local i32 @test_cast_from_bf162_to_fp162(
+// CHECK-SAME: i32 noundef [[IN_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4
+// CHECK-NEXT: [[IN:%.*]] = alloca <2 x bfloat>, align 4
+// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <2 x bfloat>, align 4
+// CHECK-NEXT: store i32 [[IN_COERCE]], ptr [[IN]], align 4
+// CHECK-NEXT: [[IN1:%.*]] = load <2 x bfloat>, ptr [[IN]], align 4
+// CHECK-NEXT: store <2 x bfloat> [[IN1]], ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x bfloat> [[TMP0]] to <2 x half>
----------------
arsenm wrote:
This bitcast also doesn't look right. I'm shocked that the vector cast behavior seems to treat FP-to-int vectors as bitcast, radically different from the scalar case (which OpenCL doesn't even allow).
The comment says it's allowing bitcast between fp/int of the same size, but that's not really what the cast is here.
https://github.com/llvm/llvm-project/pull/89051
More information about the cfe-commits
mailing list