[clang] [CIR] Implement Type promotion for VectorType (PR #158715)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 15 11:54:32 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Amr Hesham (AmrDeveloper)
<details>
<summary>Changes</summary>
This change adds support for type promotion for VectorType
Issue https://github.com/llvm/llvm-project/issues/136487
---
Full diff: https://github.com/llvm/llvm-project/pull/158715.diff
3 Files Affected:
- (modified) clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp (+6-4)
- (modified) clang/test/CIR/CodeGen/vector-ext.cpp (+40)
- (modified) clang/test/CIR/CodeGen/vector.cpp (+40)
``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 2261e24fe44c2..481cff010da16 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -864,19 +864,21 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
// TODO(cir): Candidate to be in a common AST helper between CIR and LLVM
// codegen.
QualType getPromotionType(QualType ty) {
+ const auto &ctx = cgf.getContext();
if (ty->getAs<ComplexType>()) {
assert(!cir::MissingFeatures::complexType());
cgf.cgm.errorNYI("promotion to complex type");
return QualType();
}
+
if (ty.UseExcessPrecision(cgf.getContext())) {
- if (ty->getAs<VectorType>()) {
- assert(!cir::MissingFeatures::vectorType());
- cgf.cgm.errorNYI("promotion to vector type");
- return QualType();
+ if (auto *vt = ty->getAs<VectorType>()) {
+ unsigned numElements = vt->getNumElements();
+ return ctx.getVectorType(ctx.FloatTy, numElements, vt->getVectorKind());
}
return cgf.getContext().FloatTy;
}
+
return QualType();
}
diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp
index 2ee42187a6e94..de0771ca177c1 100644
--- a/clang/test/CIR/CodeGen/vector-ext.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext.cpp
@@ -13,6 +13,7 @@ typedef int vi3 __attribute__((ext_vector_type(3)));
typedef int vi2 __attribute__((ext_vector_type(2)));
typedef float vf4 __attribute__((ext_vector_type(4)));
typedef double vd2 __attribute__((ext_vector_type(2)));
+typedef _Float16 vh4 __attribute__((ext_vector_type(4)));
vi4 vec_a;
// CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<4 x !s32i>
@@ -1176,3 +1177,42 @@ void foo21() {
// OGCG: %[[SIZE:.*]] = alloca i64, align 8
// OGCG: store i64 4, ptr %[[SIZE]], align 8
+
+void foo24() {
+ vh4 a;
+ vh4 b;
+ vh4 c = a + b;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["c", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
+// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
+// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float>
+// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16>
+// CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[C_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
+// LLVM: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
+// LLVM: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
+// LLVM: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
+// LLVM: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
+// LLVM: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
+// LLVM: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[C_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
+// OGCG: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
+// OGCG: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
+// OGCG: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
+// OGCG: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
+// OGCG: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
+// OGCG: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
index 18fa90bd2cb3f..6c55ab1edc7d2 100644
--- a/clang/test/CIR/CodeGen/vector.cpp
+++ b/clang/test/CIR/CodeGen/vector.cpp
@@ -12,6 +12,7 @@ typedef unsigned int uvi4 __attribute__((vector_size(16)));
typedef float vf4 __attribute__((vector_size(16)));
typedef double vd2 __attribute__((vector_size(16)));
typedef long long vll2 __attribute__((vector_size(16)));
+typedef _Float16 vh4 __attribute__((vector_size(8)));
vi4 vec_a;
// CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<4 x !s32i>
@@ -1218,3 +1219,42 @@ void foo24() {
// OGCG: %[[SIZE:.*]] = alloca i64, align 8
// OGCG: store i64 4, ptr %[[SIZE]], align 8
+
+void foo27() {
+ vh4 a;
+ vh4 b;
+ vh4 c = a + b;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["c", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
+// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
+// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float>
+// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16>
+// CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[C_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
+// LLVM: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
+// LLVM: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
+// LLVM: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
+// LLVM: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
+// LLVM: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
+// LLVM: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[C_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
+// OGCG: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
+// OGCG: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
+// OGCG: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
+// OGCG: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
+// OGCG: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
+// OGCG: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
``````````
</details>
https://github.com/llvm/llvm-project/pull/158715
More information about the cfe-commits
mailing list