[clang] [CIR] Implement __builtin_object_size and __builtin_dynamic_object_size (PR #166191)
Morris Hafner via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 3 09:10:36 PST 2025
https://github.com/mmha updated https://github.com/llvm/llvm-project/pull/166191
>From 30c9a5cf5f2ca7192f16eaae0eb37e13425b39b4 Mon Sep 17 00:00:00 2001
From: Morris Hafner <mhafner at nvidia.com>
Date: Tue, 4 Nov 2025 01:00:07 +0800
Subject: [PATCH 1/3] [CIR] Implement __builtin_object_size and
__builtin_dynamic_object_size
* Add cir.objsize operation to CIR dialect
* Add lowering for cir.objsize operation to LLVM dialect
* Add codegen for __builtin_object_size and __builtin_dynamic_object_size
Note that this does not support the pass_object_size attribute yet.
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 42 +++++++++++++
clang/include/clang/CIR/MissingFeatures.h | 1 +
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 60 +++++++++++++++++++
clang/lib/CIR/CodeGen/CIRGenFunction.h | 8 +++
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 23 +++++++
.../test/CIR/CodeGen/builtin-object-size.cpp | 38 ++++++++++++
6 files changed, 172 insertions(+)
create mode 100644 clang/test/CIR/CodeGen/builtin-object-size.cpp
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 2b361ed0982c6..4cf0b817e8056 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4089,6 +4089,48 @@ def CIR_PrefetchOp : CIR_Op<"prefetch"> {
}];
}
+//===----------------------------------------------------------------------===//
+// ObjSizeOp
+//===----------------------------------------------------------------------===//
+
+def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> {
+ let summary = "Implements the llvm.objsize builtin";
+ let description = [{
+ The `cir.objsize` operation models the behavior of the `llvm.objectsize`
+ intrinsic in Clang. It returns the number of accessible bytes past ptr.
+
+ When the `min` attribute is present, the operation returns the minimum
+ guaranteed accessible size. When absent (max mode), it returns the maximum
+ possible object size. Additionally, when the object size is unknown, min
+ mode returns 0 while max mode returns -1. Corresponds to `llvm.objectsize`'s
+ `min` argument.
+
+ The `dynamic` attribute determines if the value should be evaluated at
+ runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument.
+
+ Example:
+
+ ```mlir
+ %size = cir.objsize min %ptr : !cir.ptr<i32> -> i64
+ %dsize = cir.objsize max dynamic %ptr : !cir.ptr<i32> -> i64
+ ```
+ }];
+
+ let arguments = (ins
+ CIR_PointerType:$ptr,
+ UnitAttr:$min,
+ UnitAttr:$dynamic
+ );
+
+ let results = (outs CIR_AnyFundamentalIntType:$result);
+
+ let assemblyFormat = [{
+ (`min` $min^) : (`max`)?
+ (`dynamic` $dynamic^)?
+ $ptr `:` qualified(type($ptr)) `->` qualified(type($result)) attr-dict
+ }];
+}
+
//===----------------------------------------------------------------------===//
// PtrDiffOp
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 48ef8be9fb782..ae96f6b932571 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -213,6 +213,7 @@ struct MissingFeatures {
static bool builtinCallMathErrno() { return false; }
static bool builtinCheckKind() { return false; }
static bool cgCapturedStmtInfo() { return false; }
+ static bool countedBySize() { return false; }
static bool cgFPOptionsRAII() { return false; }
static bool checkBitfieldClipping() { return false; }
static bool cirgenABIInfo() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index e35100ffe4b6b..6617895fa8832 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -459,6 +459,19 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
return emitCall(e->getCallee()->getType(), CIRGenCallee::forDirect(fnOp), e,
returnValue);
}
+ case Builtin::BI__builtin_dynamic_object_size:
+ case Builtin::BI__builtin_object_size: {
+ unsigned type =
+ e->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
+ auto resType = mlir::cast<cir::IntType>(convertType(e->getType()));
+
+ // We pass this builtin onto the optimizer so that it can figure out the
+ // object size in more complex cases.
+ bool isDynamic = builtinID == Builtin::BI__builtin_dynamic_object_size;
+ return RValue::get(emitBuiltinObjectSize(e->getArg(0), type, resType,
+ /*EmittedE=*/nullptr, isDynamic));
+ }
+
case Builtin::BI__builtin_prefetch: {
auto evaluateOperandAsInt = [&](const Expr *arg) {
Expr::EvalResult res;
@@ -641,3 +654,50 @@ mlir::Value CIRGenFunction::emitVAArg(VAArgExpr *ve) {
mlir::Value vaList = emitVAListRef(ve->getSubExpr()).getPointer();
return cir::VAArgOp::create(builder, loc, type, vaList);
}
+
+/// Returns a Value corresponding to the size of the given expression.
+/// This Value may be either of the following:
+///
+/// - Reference an argument if `pass_object_size` is used.
+/// - A call to a `cir.objsize`.
+///
+/// emittedE is the result of emitting `e` as a scalar expr. If it's non-null
+/// and we wouldn't otherwise try to reference a pass_object_size parameter,
+/// we'll call `cir.objsize` on emittedE, rather than emitting e.
+mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type,
+ cir::IntType resType,
+ mlir::Value emittedE,
+ bool isDynamic) {
+ assert(!cir::MissingFeatures::opCallImplicitObjectSizeArgs());
+
+ // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
+ // evaluate e for side-effects. In either case, just like original LLVM
+ // lowering, we shouldn't lower to `cir.objsize`.
+ if (type == 3 || (!emittedE && e->HasSideEffects(getContext())))
+ return builder.getConstInt(getLoc(e->getSourceRange()), resType,
+ (type & 2) ? 0 : -1);
+
+ mlir::Value ptr = emittedE ? emittedE : emitScalarExpr(e);
+ assert(mlir::isa<cir::PointerType>(ptr.getType()) &&
+ "Non-pointer passed to __builtin_object_size?");
+
+ assert(!cir::MissingFeatures::countedBySize());
+
+ // LLVM intrinsics (which CIR lowers to at some point, only supports 0
+ // and 2, account for that right now.
+ const bool min = ((type & 2) != 0);
+ // TODO(cir): Heads up for LLVM lowering, For GCC compatibility,
+ // __builtin_object_size treat NULL as unknown size.
+ auto op = cir::ObjSizeOp::create(builder, getLoc(e->getSourceRange()),
+ resType, ptr, min, isDynamic);
+ return op.getResult();
+}
+
+mlir::Value CIRGenFunction::evaluateOrEmitBuiltinObjectSize(
+ const Expr *e, unsigned type, cir::IntType resType, mlir::Value emittedE,
+ bool isDynamic) {
+ uint64_t objectSize;
+ if (!e->tryEvaluateObjectSize(objectSize, getContext(), type))
+ return emitBuiltinObjectSize(e, type, resType, emittedE, isDynamic);
+ return builder.getConstInt(getLoc(e->getSourceRange()), resType, objectSize);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index e5cecaa573a6e..3d3d4fa410d1a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1304,6 +1304,14 @@ class CIRGenFunction : public CIRGenTypeCache {
RValue emitBuiltinExpr(const clang::GlobalDecl &gd, unsigned builtinID,
const clang::CallExpr *e, ReturnValueSlot returnValue);
+ mlir::Value emitBuiltinObjectSize(const clang::Expr *e, unsigned type,
+ cir::IntType resType, mlir::Value emittedE,
+ bool isDynamic);
+
+ mlir::Value evaluateOrEmitBuiltinObjectSize(const clang::Expr *e,
+ unsigned type, cir::IntType resType,
+ mlir::Value emittedE, bool isDynamic);
+
RValue emitCall(const CIRGenFunctionInfo &funcInfo,
const CIRGenCallee &callee, ReturnValueSlot returnValue,
const CallArgList &args, cir::CIRCallOpInterface *callOp,
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5a6193fa8d840..6322b2979fa31 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2816,6 +2816,29 @@ static void collectUnreachable(mlir::Operation *parent,
}
}
+mlir::LogicalResult CIRToLLVMObjSizeOpLowering::matchAndRewrite(
+ cir::ObjSizeOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Type llvmResTy = getTypeConverter()->convertType(op.getType());
+ mlir::Location loc = op->getLoc();
+
+ mlir::IntegerType i1Ty = rewriter.getI1Type();
+
+ auto i1Val = [&rewriter, &loc, &i1Ty](bool val) {
+ return mlir::LLVM::ConstantOp::create(rewriter, loc, i1Ty, val);
+ };
+
+ replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm.objectsize", llvmResTy,
+ {
+ adaptor.getPtr(),
+ i1Val(op.getMin()),
+ i1Val(true),
+ i1Val(op.getDynamic()),
+ });
+
+ return mlir::LogicalResult::success();
+}
+
void ConvertCIRToLLVMPass::processCIRAttrs(mlir::ModuleOp module) {
// Lower the module attributes to LLVM equivalents.
if (mlir::Attribute tripleAttr =
diff --git a/clang/test/CIR/CodeGen/builtin-object-size.cpp b/clang/test/CIR/CodeGen/builtin-object-size.cpp
new file mode 100644
index 0000000000000..e077a2b57bf1d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-object-size.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+typedef unsigned long size_t;
+
+// CIR-LABEL: @_Z4testPc
+// LLVM-LABEL: define {{.*}} i64 @_Z4testPc
+// OGCG-LABEL: define {{.*}} i64 @_Z4testPc
+size_t test(char *ptr) {
+ // CIR: cir.objsize max {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ return __builtin_object_size(ptr, 0);
+}
+
+// CIR-LABEL: @_Z8test_minPc
+// LLVM-LABEL: define {{.*}} i64 @_Z8test_minPc
+// OGCG-LABEL: define {{.*}} i64 @_Z8test_minPc
+size_t test_min(char *ptr) {
+ // CIR: cir.objsize min {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+ return __builtin_object_size(ptr, 2);
+}
+
+// CIR-LABEL: @_Z17test_dynamic_sizePc
+// LLVM-LABEL: define {{.*}} i64 @_Z17test_dynamic_sizePc
+// OGCG-LABEL: define {{.*}} i64 @_Z17test_dynamic_sizePc
+size_t test_dynamic_size(char *ptr) {
+ // CIR: cir.objsize max dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 true)
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 true)
+ return __builtin_dynamic_object_size(ptr, 0);
+}
>From 4d4fe2ae34d7b8516e71a203fa56bb5ff466ef97 Mon Sep 17 00:00:00 2001
From: Morris Hafner <mhafner at nvidia.com>
Date: Tue, 4 Nov 2025 01:09:13 +0800
Subject: [PATCH 2/3] clang-format, comment updates
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 14 ++++----------
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 17 ++++++++++-------
2 files changed, 14 insertions(+), 17 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 6617895fa8832..ef58b8c592c63 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -655,14 +655,10 @@ mlir::Value CIRGenFunction::emitVAArg(VAArgExpr *ve) {
return cir::VAArgOp::create(builder, loc, type, vaList);
}
-/// Returns a Value corresponding to the size of the given expression.
-/// This Value may be either of the following:
+/// Returns a Value corresponding to the size of the given expression by
+/// emitting a `cir.objsize` operation.
///
-/// - Reference an argument if `pass_object_size` is used.
-/// - A call to a `cir.objsize`.
-///
-/// emittedE is the result of emitting `e` as a scalar expr. If it's non-null
-/// and we wouldn't otherwise try to reference a pass_object_size parameter,
+/// emittedE is the result of emitting `e` as a scalar expr. If it's non-null,
/// we'll call `cir.objsize` on emittedE, rather than emitting e.
mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type,
cir::IntType resType,
@@ -675,7 +671,7 @@ mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type,
// lowering, we shouldn't lower to `cir.objsize`.
if (type == 3 || (!emittedE && e->HasSideEffects(getContext())))
return builder.getConstInt(getLoc(e->getSourceRange()), resType,
- (type & 2) ? 0 : -1);
+ (type & 2) ? 0 : -1);
mlir::Value ptr = emittedE ? emittedE : emitScalarExpr(e);
assert(mlir::isa<cir::PointerType>(ptr.getType()) &&
@@ -686,8 +682,6 @@ mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type,
// LLVM intrinsics (which CIR lowers to at some point, only supports 0
// and 2, account for that right now.
const bool min = ((type & 2) != 0);
- // TODO(cir): Heads up for LLVM lowering, For GCC compatibility,
- // __builtin_object_size treat NULL as unknown size.
auto op = cir::ObjSizeOp::create(builder, getLoc(e->getSourceRange()),
resType, ptr, min, isDynamic);
return op.getResult();
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 6322b2979fa31..470c22631d8e5 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2828,13 +2828,16 @@ mlir::LogicalResult CIRToLLVMObjSizeOpLowering::matchAndRewrite(
return mlir::LLVM::ConstantOp::create(rewriter, loc, i1Ty, val);
};
- replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm.objectsize", llvmResTy,
- {
- adaptor.getPtr(),
- i1Val(op.getMin()),
- i1Val(true),
- i1Val(op.getDynamic()),
- });
+ replaceOpWithCallLLVMIntrinsicOp(
+ rewriter, op, "llvm.objectsize", llvmResTy,
+ {
+ adaptor.getPtr(),
+ i1Val(op.getMin()),
+ // For GCC compatibility, __builtin_object_size treat NULL as unknown
+ // size.
+ i1Val(true),
+ i1Val(op.getDynamic()),
+ });
return mlir::LogicalResult::success();
}
>From 2dc7585dd6a1b80d4cef03c0144b8fa6127c50c2 Mon Sep 17 00:00:00 2001
From: Morris Hafner <mhafner at nvidia.com>
Date: Tue, 4 Nov 2025 01:10:19 +0800
Subject: [PATCH 3/3] more clang-format
---
clang/lib/CIR/CodeGen/CIRGenFunction.h | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 3d3d4fa410d1a..115fa6dba2aeb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1305,12 +1305,14 @@ class CIRGenFunction : public CIRGenTypeCache {
const clang::CallExpr *e, ReturnValueSlot returnValue);
mlir::Value emitBuiltinObjectSize(const clang::Expr *e, unsigned type,
- cir::IntType resType, mlir::Value emittedE,
- bool isDynamic);
+ cir::IntType resType, mlir::Value emittedE,
+ bool isDynamic);
mlir::Value evaluateOrEmitBuiltinObjectSize(const clang::Expr *e,
- unsigned type, cir::IntType resType,
- mlir::Value emittedE, bool isDynamic);
+ unsigned type,
+ cir::IntType resType,
+ mlir::Value emittedE,
+ bool isDynamic);
RValue emitCall(const CIRGenFunctionInfo &funcInfo,
const CIRGenCallee &callee, ReturnValueSlot returnValue,
More information about the cfe-commits
mailing list