[clang] [CIR] Implement handling for VectorType with size 3 (PR #161232)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 29 10:00:07 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clangir
Author: Amr Hesham (AmrDeveloper)
<details>
<summary>Changes</summary>
Implement handling for VectorType with size 3, to promote it as a vector of size 4
---
Full diff: https://github.com/llvm/llvm-project/pull/161232.diff
5 Files Affected:
- (modified) clang/lib/CIR/CodeGen/ABIInfo.h (+9)
- (modified) clang/lib/CIR/CodeGen/CIRGenExpr.cpp (+51-18)
- (modified) clang/lib/CIR/CodeGen/TargetInfo.cpp (+9)
- (modified) clang/lib/CIR/Dialect/IR/CIRDialect.cpp (+17-9)
- (added) clang/test/CIR/CodeGen/vector-with-size-3.cpp (+38)
``````````diff
diff --git a/clang/lib/CIR/CodeGen/ABIInfo.h b/clang/lib/CIR/CodeGen/ABIInfo.h
index 4d03db38cabb9..ef9ade0a4a3ac 100644
--- a/clang/lib/CIR/CodeGen/ABIInfo.h
+++ b/clang/lib/CIR/CodeGen/ABIInfo.h
@@ -9,6 +9,9 @@
#ifndef LLVM_CLANG_LIB_CIR_ABIINFO_H
#define LLVM_CLANG_LIB_CIR_ABIINFO_H
+#include "clang/Basic/LangOptions.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
namespace clang::CIRGen {
class CIRGenFunctionInfo;
@@ -23,6 +26,12 @@ class ABIInfo {
ABIInfo(CIRGenTypes &cgt) : cgt(cgt) {}
virtual ~ABIInfo();
+
+ /// Returns the optimal vector memory type based on the given vector type. For
+ /// example, on certain targets, a vector with 3 elements might be promoted to
+ /// one with 4 elements to improve performance.
+ virtual cir::VectorType
+ getOptimalVectorMemoryType(cir::VectorType ty, const LangOptions &opt) const;
};
} // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index fa68ad931ba74..e40fa87adeec9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -314,21 +314,36 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr,
bool isInit, bool isNontemporal) {
assert(!cir::MissingFeatures::opLoadStoreThreadLocal());
+ mlir::Type srcTy = addr.getElementType();
if (const auto *clangVecTy = ty->getAs<clang::VectorType>()) {
- // Boolean vectors use `iN` as storage type.
- if (clangVecTy->isExtVectorBoolType())
- cgm.errorNYI(addr.getPointer().getLoc(),
- "emitStoreOfScalar ExtVectorBoolType");
+ if (auto vecTy = dyn_cast<cir::VectorType>(srcTy)) {
+ cir::VectorType newVecTy =
+ cgm.getTargetCIRGenInfo().getABIInfo().getOptimalVectorMemoryType(
+ vecTy, getLangOpts());
+
+ if (!clangVecTy->isPackedVectorBoolType(getContext()) &&
+ vecTy != newVecTy) {
+
+ const unsigned oldNumElements = vecTy.getSize();
+ const unsigned newNumElements = newVecTy.getSize();
+ SmallVector<mlir::Attribute, 8> indices;
+ indices.reserve(newNumElements);
+ for (unsigned i = 0; i < newNumElements; ++i) {
+ int64_t value = i < oldNumElements ? (int64_t)i : -1;
+ indices.push_back(cir::IntAttr::get(builder.getSInt64Ty(), value));
+ }
- // Handle vectors of size 3 like size 4 for better performance.
- const mlir::Type elementType = addr.getElementType();
- const auto vecTy = cast<cir::VectorType>(elementType);
+ cir::ConstantOp poison = builder.getConstant(
+ value.getLoc(), cir::PoisonAttr::get(value.getType()));
+ value =
+ cir::VecShuffleOp::create(builder, value.getLoc(), newVecTy, value,
+ poison, builder.getArrayAttr(indices));
+ srcTy = newVecTy;
+ }
- // TODO(CIR): Use `ABIInfo::getOptimalVectorMemoryType` once it upstreamed
- assert(!cir::MissingFeatures::cirgenABIInfo());
- if (vecTy.getSize() == 3 && !getLangOpts().PreserveVec3Type)
- cgm.errorNYI(addr.getPointer().getLoc(),
- "emitStoreOfScalar Vec3 & PreserveVec3Type disabled");
+ if (addr.getElementType() != srcTy)
+ addr = addr.withElementType(builder, srcTy);
+ }
}
value = emitToMemory(value, ty);
@@ -565,13 +580,31 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile,
return nullptr;
}
- const auto vecTy = cast<cir::VectorType>(eltTy);
+ // Handles vectors of sizes that are likely to be expanded to a larger size
+ // to optimize performance.
+ auto vecTy = cast<cir::VectorType>(eltTy);
+ cir::VectorType newVecTy =
+ cgm.getTargetCIRGenInfo().getABIInfo().getOptimalVectorMemoryType(
+ vecTy, getLangOpts());
+
+ if (vecTy != newVecTy) {
+ Address cast = addr.withElementType(builder, newVecTy);
+ mlir::Value value = builder.createLoad(cgm.getLoc(loc), cast, isVolatile);
+
+ unsigned oldNumElements = vecTy.getSize();
+ SmallVector<mlir::Attribute, 8> indices;
+ indices.reserve(oldNumElements);
+ for (unsigned i = 0; i < oldNumElements; i++) {
+ indices.push_back(cir::IntAttr::get(builder.getSInt64Ty(), i));
+ }
+
+ cir::ConstantOp poison = builder.getConstant(
+ value.getLoc(), cir::PoisonAttr::get(value.getType()));
+ value = builder.create<cir::VecShuffleOp>(
+ cgm.getLoc(loc), vecTy, value, poison, builder.getArrayAttr(indices));
- // Handle vectors of size 3 like size 4 for better performance.
- assert(!cir::MissingFeatures::cirgenABIInfo());
- if (vecTy.getSize() == 3 && !getLangOpts().PreserveVec3Type)
- cgm.errorNYI(addr.getPointer().getLoc(),
- "emitLoadOfScalar Vec3 & PreserveVec3Type disabled");
+ return value;
+ }
}
assert(!cir::MissingFeatures::opLoadStoreTbaa());
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index 62a8c59abe604..252d9e9ad48bc 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -60,6 +60,15 @@ clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) {
ABIInfo::~ABIInfo() noexcept = default;
+cir::VectorType
+ABIInfo::getOptimalVectorMemoryType(cir::VectorType ty,
+ const LangOptions &opt) const {
+ if (ty.getSize() == 3 && !opt.PreserveVec3Type) {
+ return cir::VectorType::get(ty.getElementType(), 4);
+ }
+ return ty;
+}
+
bool TargetCIRGenInfo::isNoProtoCallVariadic(
const FunctionNoProtoType *fnType) const {
// The following conventions are known to require this to be false:
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 58ef500446aa7..f4aab4fc6c5be 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -321,6 +321,12 @@ static LogicalResult checkConstantTypes(mlir::Operation *op, mlir::Type opType,
"zero expects struct, array, vector, or complex type");
}
+ if (isa<cir::PoisonAttr>(attrType)) {
+ if (!::mlir::isa<cir::VoidType>(opType))
+ return success();
+ return op->emitOpError("poison expects non-void type");
+ }
+
if (mlir::isa<cir::BoolAttr>(attrType)) {
if (!mlir::isa<cir::BoolType>(opType))
return op->emitOpError("result type (")
@@ -2123,23 +2129,25 @@ OpFoldResult cir::VecShuffleOp::fold(FoldAdaptor adaptor) {
if (!vec1Attr || !vec2Attr)
return {};
- mlir::Type vec1ElemTy =
- mlir::cast<cir::VectorType>(vec1Attr.getType()).getElementType();
+ mlir::ArrayAttr indicesElts = adaptor.getIndices();
+ auto indicesEltsRange = indicesElts.getAsRange<cir::IntAttr>();
+
+ // In MLIR DenseElementsAttr can't contain undef attr, so we can't fold
+ // the shuffle op to ConstVector if it's contain index with -1 value
+ if (std::find_if(indicesEltsRange.begin(), indicesEltsRange.end(),
+ [](cir::IntAttr idx) { return idx.getSInt() != -1; }) !=
+ indicesEltsRange.end()) {
+ return {};
+ }
mlir::ArrayAttr vec1Elts = vec1Attr.getElts();
mlir::ArrayAttr vec2Elts = vec2Attr.getElts();
- mlir::ArrayAttr indicesElts = adaptor.getIndices();
SmallVector<mlir::Attribute, 16> elements;
elements.reserve(indicesElts.size());
uint64_t vec1Size = vec1Elts.size();
- for (const auto &idxAttr : indicesElts.getAsRange<cir::IntAttr>()) {
- if (idxAttr.getSInt() == -1) {
- elements.push_back(cir::UndefAttr::get(vec1ElemTy));
- continue;
- }
-
+ for (const auto &idxAttr : indicesEltsRange) {
uint64_t idxValue = idxAttr.getUInt();
elements.push_back(idxValue < vec1Size ? vec1Elts[idxValue]
: vec2Elts[idxValue - vec1Size]);
diff --git a/clang/test/CIR/CodeGen/vector-with-size-3.cpp b/clang/test/CIR/CodeGen/vector-with-size-3.cpp
new file mode 100644
index 0000000000000..2108d3cefb8c5
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vector-with-size-3.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+typedef int vi3 __attribute__((ext_vector_type(3)));
+
+void store_load() {
+ vi3 a;
+ vi3 b = a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr<!cir.vector<3 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr<!cir.vector<3 x !s32i>>, ["b", init]
+// CIR: %[[A_V4:.*]] = cir.cast(bitcast, %[[A_ADDR]] : !cir.ptr<!cir.vector<3 x !s32i>>), !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: %[[TMP_A_V4:.*]] = cir.load{{.*}} %[[A_V4]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
+// CIR: %[[SHUFFLE_V4:.*]] = cir.vec.shuffle(%3, %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i] : !cir.vector<3 x !s32i>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<3 x !s32i>
+// CIR: %[[SHUFFLE_V3:.*]] = cir.vec.shuffle(%[[SHUFFLE_V4]], %[[POISON]] : !cir.vector<3 x !s32i>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<-1> : !s64i] : !cir.vector<4 x !s32i>
+// CIR: %[[TMP_B_V4:.*]] = cir.cast(bitcast, %[[B_ADDR]] : !cir.ptr<!cir.vector<3 x !s32i>>), !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: cir.store{{.*}} %[[SHUFFLE_V3]], %[[TMP_B_V4]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <3 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <3 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[SHUFFLE_V4:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// LLVM: %[[SHUFFLE_V3:.*]] = shufflevector <3 x i32> %[[SHUFFLE_V4]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// LLVM: store <4 x i32> %[[SHUFFLE_V3]], ptr %[[B_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <3 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <3 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[SHUFFLE_V4:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// OGCG: %[[SHUFFLE_V3:.*]] = shufflevector <3 x i32> %[[SHUFFLE_V4]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// OGCG: store <4 x i32> %[[SHUFFLE_V3]], ptr %[[B_ADDR]], align 16
``````````
</details>
https://github.com/llvm/llvm-project/pull/161232
More information about the cfe-commits
mailing list