[clang] [CIR] Upstream pointer arithmetic support (PR #138041)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 30 15:00:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clangir
Author: Andy Kaylor (andykaylor)
<details>
<summary>Changes</summary>
This adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values.
---
Full diff: https://github.com/llvm/llvm-project/pull/138041.diff
3 Files Affected:
- (modified) clang/lib/CIR/CodeGen/CIRGenBuilder.h (+9)
- (modified) clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp (+86-5)
- (added) clang/test/CIR/CodeGen/pointers.cpp (+73)
``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index 5f3bf8b416cde..7d16d70564227 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -185,6 +185,15 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
}
bool isInt(mlir::Type i) { return mlir::isa<cir::IntType>(i); }
+ //
+ // Constant creation helpers
+ // -------------------------
+ //
+ cir::ConstantOp getSInt32(int32_t c, mlir::Location loc) {
+ auto sInt32Ty = getSInt32Ty();
+ return create<cir::ConstantOp>(loc, cir::IntAttr::get(sInt32Ty, c));
+ }
+
// Creates constant nullptr for pointer type ty.
cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) {
assert(!cir::MissingFeatures::targetCodeGenInfoGetNullPointer());
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index aef5b125a2877..9fc9bbc3e0c93 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -388,9 +388,26 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
// NOTE(CIR): clang calls CreateAdd but folds this to a unary op
value = emitUnaryOp(e, kind, input, /*nsw=*/false);
}
- } else if (isa<PointerType>(type)) {
- cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec pointer");
- return {};
+ } else if (const PointerType *ptr = type->getAs<PointerType>()) {
+ QualType type = ptr->getPointeeType();
+ if (cgf.getContext().getAsVariableArrayType(type)) {
+ // VLA types don't have constant size.
+ cgf.cgm.errorNYI(e->getSourceRange(), "Pointer arithmetic on VLA");
+ return {};
+ } else if (type->isFunctionType()) {
+ // Arithmetic on function pointers (!) is just +-1.
+ cgf.cgm.errorNYI(e->getSourceRange(),
+ "Pointer arithmetic on function pointer");
+ return {};
+ } else {
+ // For everything else, we can just do a simple increment.
+ mlir::Location loc = cgf.getLoc(e->getSourceRange());
+ CIRGenBuilderTy &builder = cgf.getBuilder();
+ int amount = (isInc ? 1 : -1);
+ mlir::Value amt = builder.getSInt32(amount, loc);
+ assert(!cir::MissingFeatures::sanitizers());
+ value = builder.createPtrStride(loc, value, amt);
+ }
} else if (type->isVectorType()) {
cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec vector");
return {};
@@ -1152,8 +1169,72 @@ getUnwidenedIntegerType(const ASTContext &astContext, const Expr *e) {
static mlir::Value emitPointerArithmetic(CIRGenFunction &cgf,
const BinOpInfo &op,
bool isSubtraction) {
- cgf.cgm.errorNYI(op.loc, "pointer arithmetic");
- return {};
+ // Must have binary (not unary) expr here. Unary pointer
+ // increment/decrement doesn't use this path.
+ const BinaryOperator *expr = cast<BinaryOperator>(op.e);
+
+ mlir::Value pointer = op.lhs;
+ Expr *pointerOperand = expr->getLHS();
+ mlir::Value index = op.rhs;
+ Expr *indexOperand = expr->getRHS();
+
+ // In a subtraction, the LHS is always the pointer.
+ if (!isSubtraction && !mlir::isa<cir::PointerType>(pointer.getType())) {
+ std::swap(pointer, index);
+ std::swap(pointerOperand, indexOperand);
+ }
+
+ // Some versions of glibc and gcc use idioms (particularly in their malloc
+ // routines) that add a pointer-sized integer (known to be a pointer value)
+ // to a null pointer in order to cast the value back to an integer or as
+ // part of a pointer alignment algorithm. This is undefined behavior, but
+ // we'd like to be able to compile programs that use it.
+ //
+ // Normally, we'd generate a GEP with a null-pointer base here in response
+ // to that code, but it's also UB to dereference a pointer created that
+ // way. Instead (as an acknowledged hack to tolerate the idiom) we will
+ // generate a direct cast of the integer value to a pointer.
+ //
+ // The idiom (p = nullptr + N) is not met if any of the following are true:
+ //
+ // The operation is subtraction.
+ // The index is not pointer-sized.
+ // The pointer type is not byte-sized.
+ //
+ if (BinaryOperator::isNullPointerArithmeticExtension(
+ cgf.getContext(), op.opcode, expr->getLHS(), expr->getRHS()))
+ return cgf.getBuilder().createIntToPtr(index, pointer.getType());
+
+ // Differently from LLVM codegen, ABI bits for index sizes is handled during
+ // LLVM lowering.
+
+ // If this is subtraction, negate the index.
+ if (isSubtraction)
+ index = cgf.getBuilder().createNeg(index);
+
+ assert(!cir::MissingFeatures::sanitizers());
+
+ const PointerType *pointerType =
+ pointerOperand->getType()->getAs<PointerType>();
+ if (!pointerType) {
+ cgf.cgm.errorNYI("Objective-C:pointer arithmetic with non-pointer type");
+ return nullptr;
+ }
+
+ QualType elementType = pointerType->getPointeeType();
+ if (cgf.getContext().getAsVariableArrayType(elementType)) {
+ cgf.cgm.errorNYI("variable array type");
+ return nullptr;
+ }
+
+ if (elementType->isVoidType() || elementType->isFunctionType()) {
+ cgf.cgm.errorNYI("void* or function pointer arithmetic");
+ return nullptr;
+ }
+
+ assert(!cir::MissingFeatures::sanitizers());
+ return cgf.getBuilder().create<cir::PtrStrideOp>(
+ cgf.getLoc(op.e->getExprLoc()), pointer.getType(), pointer, index);
}
mlir::Value ScalarExprEmitter::emitMul(const BinOpInfo &ops) {
diff --git a/clang/test/CIR/CodeGen/pointers.cpp b/clang/test/CIR/CodeGen/pointers.cpp
new file mode 100644
index 0000000000000..c791356e15bb6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointers.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should generate basic pointer arithmetics.
+void foo(int *iptr, char *cptr, unsigned ustride) {
+ iptr + 2;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+ cptr + 3;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s8i>
+ iptr - 2;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+ // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
+ cptr - 3;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+ // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s8i>
+ iptr + ustride;
+ // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !u32i), !cir.ptr<!s32i>
+
+ // Must convert unsigned stride to a signed one.
+ iptr - ustride;
+ // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+ // CHECK: %[[#SIGNSTRIDE:]] = cir.cast(integral, %[[#STRIDE]] : !u32i), !s32i
+ // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
+
+ iptr++;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+
+ iptr--;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<-1> : !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+}
+
+void testPointerSubscriptAccess(int *ptr) {
+// CHECK: testPointerSubscriptAccess
+ ptr[1];
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
+ // CHECK: %[[#PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+ // CHECK: cir.ptr_stride(%[[#PTR]] : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+}
+
+void testPointerMultiDimSubscriptAccess(int **ptr) {
+// CHECK: testPointerMultiDimSubscriptAccess
+ ptr[1][2];
+ // CHECK: %[[#STRIDE2:]] = cir.const #cir.int<2> : !s32i
+ // CHECK: %[[#STRIDE1:]] = cir.const #cir.int<1> : !s32i
+ // CHECK: %[[#PTR1:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, !cir.ptr<!cir.ptr<!s32i>>
+ // CHECK: %[[#PTR2:]] = cir.ptr_stride(%[[#PTR1]] : !cir.ptr<!cir.ptr<!s32i>>, %[[#STRIDE1]] : !s32i), !cir.ptr<!cir.ptr<!s32i>>
+ // CHECK: %[[#PTR3:]] = cir.load %[[#PTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+ // CHECK: cir.ptr_stride(%[[#PTR3]] : !cir.ptr<!s32i>, %[[#STRIDE2]] : !s32i), !cir.ptr<!s32i>
+}
+
+// This test is meant to verify code that handles the 'p = nullptr + n' idiom
+// used by some versions of glibc and gcc. This is undefined behavior but
+// it is intended there to act like a conversion from a pointer-sized integer
+// to a pointer, and we would like to tolerate that.
+
+#define NULLPTRINT ((int*)0)
+
+// This should get the inttoptr instruction.
+int *testGnuNullPtrArithmetic(unsigned n) {
+// CHECK: testGnuNullPtrArithmetic
+ return NULLPTRINT + n;
+ // CHECK: %[[NULLPTR:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+ // CHECK: %[[N:.*]] = cir.load %{{.*}} : !cir.ptr<!u32i>, !u32i
+ // CHECK: %[[RESULT:.*]] = cir.ptr_stride(%[[NULLPTR]] : !cir.ptr<!s32i>, %[[N]] : !u32i), !cir.ptr<!s32i>
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/138041
More information about the cfe-commits
mailing list