[clang] [CIR] Upstream pointer subtraction handling (PR #163306)

Tue Oct 14 13:31:42 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-clangir

Author: Shawn K (kimsh02)

<details>
<summary>Changes</summary>

Fix #162360

---
Full diff: https://github.com/llvm/llvm-project/pull/163306.diff


6 Files Affected:

- (modified) clang/include/clang/CIR/Dialect/IR/CIROps.td (+31) 
- (modified) clang/include/clang/CIR/MissingFeatures.h (+1) 
- (modified) clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp (+3-3) 
- (modified) clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp (+43) 
- (added) clang/test/CIR/CodeGen/ptrdiff.c (+35) 
- (added) clang/test/CIR/CodeGen/ptrdiff.cpp (+13) 


``````````diff

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 4c15d9ed0f834..56ff289f91a93 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4044,6 +4044,37 @@ def CIR_ExpectOp : CIR_Op<"expect", [
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// PtrDiffOp
+//===----------------------------------------------------------------------===//
+
+def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> {
+  let summary = "Pointer subtraction arithmetic";
+  let description = [{
+    The cir.ptr_diff operation computes the difference between two pointers that
+    have the same element type
+
+    The result reflects the ABI-defined size of the pointed-to type. For example,
+    subtracting two !cir.ptr<!u64i> values may yield 1, representing an 8-byte
+    difference. In contrast, for pointers to void or function types, a result of
+    8 corresponds to an 8-byte difference.
+
+    Example:
+
+    ```mlir
+    %7 = cir.ptr_diff %0, %1 : !cir.ptr<!u64i> -> !u64i
+    ```
+  }];
+
+  let arguments = (ins CIR_PointerType:$lhs, CIR_PointerType:$rhs);
+  let results = (outs CIR_AnyFundamentalIntType:$result);
+
+  let assemblyFormat = [{
+    $lhs `,` $rhs  `:` qualified(type($lhs)) `->` qualified(type($result)) 
+    attr-dict
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // Floating Point Ops
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 4fbae150b587e..d133453c137fb 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -322,6 +322,7 @@ struct MissingFeatures {
   static bool invokeOp() { return false; }
   static bool labelOp() { return false; }
   static bool ptrDiffOp() { return false; }
+  static bool llvmLoweringPtrDiffConsidersPointee() {return false; }
   static bool ptrStrideOp() { return false; }
   static bool switchOp() { return false; }
   static bool throwOp() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 637f9ef65c88f..8889bbeebe399 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -1734,9 +1734,9 @@ mlir::Value ScalarExprEmitter::emitSub(const BinOpInfo &ops) {
   // LLVM we shall take VLA's, division by element size, etc.
   //
   // See more in `EmitSub` in CGExprScalar.cpp.
-  assert(!cir::MissingFeatures::ptrDiffOp());
-  cgf.cgm.errorNYI("ptrdiff");
-  return {};
+  assert(!cir::MissingFeatures::llvmLoweringPtrDiffConsidersPointee());
+  return builder.create<cir::PtrDiffOp>(cgf.getLoc(ops.loc), cgf.PtrDiffTy,
+                                        ops.lhs, ops.rhs);
 }
 
 mlir::Value ScalarExprEmitter::emitShl(const BinOpInfo &ops) {
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index f0d73ac872386..4b8d912409a8e 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1499,6 +1499,49 @@ mlir::LogicalResult CIRToLLVMConstantOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+static uint64_t getTypeSize(mlir::Type type,
+                                                 mlir::Operation &op)  {
+  mlir::DataLayout layout(op.getParentOfType<mlir::ModuleOp>());
+  // For LLVM purposes we treat void as u8.
+  if (isa<cir::VoidType>(type))
+    type = cir::IntType::get(type.getContext(), 8, /*isSigned=*/false);
+  return llvm::divideCeil(layout.getTypeSizeInBits(type), 8);
+}
+
+mlir::LogicalResult CIRToLLVMPtrDiffOpLowering::matchAndRewrite(
+    cir::PtrDiffOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  auto dstTy = mlir::cast<cir::IntType>(op.getType());
+  auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+
+  auto lhs = rewriter.create<mlir::LLVM::PtrToIntOp>(op.getLoc(), llvmDstTy,
+                                                     adaptor.getLhs());
+  auto rhs = rewriter.create<mlir::LLVM::PtrToIntOp>(op.getLoc(), llvmDstTy,
+                                                     adaptor.getRhs());
+
+  auto diff =
+      rewriter.create<mlir::LLVM::SubOp>(op.getLoc(), llvmDstTy, lhs, rhs);
+
+  cir::PointerType ptrTy = op.getLhs().getType();
+  auto typeSize = getTypeSize(ptrTy.getPointee(), *op);
+
+  // Avoid silly division by 1.
+  auto resultVal = diff.getResult();
+  if (typeSize != 1) {
+    auto typeSizeVal = rewriter.create<mlir::LLVM::ConstantOp>(
+        op.getLoc(), llvmDstTy, typeSize);
+
+    if (dstTy.isUnsigned())
+      resultVal =
+          rewriter.create<mlir::LLVM::UDivOp>(op.getLoc(), diff, typeSizeVal);
+    else
+      resultVal =
+          rewriter.create<mlir::LLVM::SDivOp>(op.getLoc(), diff, typeSizeVal);
+  }
+  rewriter.replaceOp(op, resultVal);
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMExpectOpLowering::matchAndRewrite(
     cir::ExpectOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
diff --git a/clang/test/CIR/CodeGen/ptrdiff.c b/clang/test/CIR/CodeGen/ptrdiff.c
new file mode 100644
index 0000000000000..33a0c3ad4faf4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/ptrdiff.c
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+int addrcmp(const void* a, const void* b) {
+  // CIR-LABEL: addrcmp
+  // CIR: %[[R:.*]] = cir.ptr_diff
+  // CIR: cir.cast integral %[[R]] : !s64i -> !s32i
+
+  // LLVM-LABEL: define dso_local i32 @addrcmp(
+  // LLVM: %[[PTR_A:.*]] = ptrtoint ptr {{.*}} to i64
+  // LLVM: %[[PTR_B:.*]] = ptrtoint ptr {{.*}} to i64
+  // LLVM: %[[SUB:.*]] = sub i64 %[[PTR_A]], %[[PTR_B]]
+  // LLVM-NOT: sdiv
+  // LLVM: trunc i64 %[[SUB]] to i32
+  return *(const void**)a - *(const void**)b;
+}
+
+unsigned long long test_ptr_diff(int *a, int* b) {
+  // CIR-LABEL: test_ptr_diff
+  // CIR: %[[D:.*]] = cir.ptr_diff {{.*}} : !cir.ptr<!s32i> -> !s64i
+  // CIR: %[[U:.*]] = cir.cast integral %[[D]] : !s64i -> !u64i
+  // CIR: cir.return {{.*}} : !u64i
+
+  // LLVM-LABEL: define dso_local i64 @test_ptr_diff(
+  // LLVM: %[[IA:.*]] = ptrtoint ptr %{{.*}} to i64
+  // LLVM: %[[IB:.*]] = ptrtoint ptr %{{.*}} to i64
+  // LLVM: %[[SUB:.*]] = sub i64 %[[IA]], %[[IB]]
+  // LLVM: %[[Q:.*]] = sdiv{{( exact)?}} i64 %[[SUB]], 4
+  // LLVM: store i64 %[[Q]], ptr %[[RETADDR:.*]], align
+  // LLVM: %[[RETLOAD:.*]] = load i64, ptr %[[RETADDR]], align
+  // LLVM: ret i64 %[[RETLOAD]]
+  return a - b;
+}
diff --git a/clang/test/CIR/CodeGen/ptrdiff.cpp b/clang/test/CIR/CodeGen/ptrdiff.cpp
new file mode 100644
index 0000000000000..2c0aed6089816
--- /dev/null
+++ b/clang/test/CIR/CodeGen/ptrdiff.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+typedef unsigned long size_type;
+
+size_type size(unsigned long *_start, unsigned long *_finish) {
+  // CIR-LABEL: cir.func dso_local @_Z4sizePmS_
+  // CIR: %[[D:.*]] = cir.ptr_diff {{.*}} : !cir.ptr<!u64i> -> !s64i
+  // CIR: %[[U:.*]] = cir.cast integral %[[D]] : !s64i -> !u64i
+  // CIR: cir.return {{.*}} : !u64i
+
+  return static_cast<size_type>(_finish - _start);
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/163306