[clang] [CIR] Upstream support for iterator-based range for loops (PR #140636)

Mon May 19 15:28:07 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Andy Kaylor (andykaylor)

<details>
<summary>Changes</summary>

This change adds handling for C++ member operator calls, implicit no-op casts, and l-value call expressions. Together, these changes enable handling of range for loops based on iterators.

---
Full diff: https://github.com/llvm/llvm-project/pull/140636.diff


5 Files Affected:

- (modified) clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp (+16-3) 
- (modified) clang/lib/CIR/CodeGen/CIRGenExpr.cpp (+139-4) 
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.cpp (+22) 
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+9) 
- (modified) clang/test/CIR/CodeGen/forrange.cpp (+84) 


``````````diff

diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp
index 906c212f0fa8a..33865728e4cdc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp
@@ -98,9 +98,11 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorMemberCallExpr(
   CallArgList rtlArgStorage;
   CallArgList *rtlArgs = nullptr;
   if (auto *oce = dyn_cast<CXXOperatorCallExpr>(ce)) {
-    cgm.errorNYI(oce->getSourceRange(),
-                 "emitCXXMemberOrOperatorMemberCallExpr: operator call");
-    return RValue::get(nullptr);
+    if (oce->isAssignmentOp()) {
+      cgm.errorNYI(
+          oce->getSourceRange(),
+          "emitCXXMemberOrOperatorMemberCallExpr: assignment operator");
+    }
   }
 
   LValue thisPtr;
@@ -169,6 +171,17 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorMemberCallExpr(
       /*ImplicitParam=*/nullptr, QualType(), ce, rtlArgs);
 }
 
+RValue
+CIRGenFunction::emitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *e,
+                                              const CXXMethodDecl *md,
+                                              ReturnValueSlot returnValue) {
+  assert(md->isInstance() &&
+         "Trying to emit a member call expr on a static method!");
+  return emitCXXMemberOrOperatorMemberCallExpr(
+      e, md, returnValue, /*HasQualifier=*/false, /*Qualifier=*/nullptr,
+      /*IsArrow=*/false, e->getArg(0));
+}
+
 RValue CIRGenFunction::emitCXXMemberOrOperatorCall(
     const CXXMethodDecl *md, const CIRGenCallee &callee,
     ReturnValueSlot returnValue, mlir::Value thisPtr, mlir::Value implicitParam,
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index b8e644d80d747..2b97d45a84d64 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -743,6 +743,122 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) {
   return lv;
 }
 
+/// Casts are never lvalues unless that cast is to a reference type. If the cast
+/// is to a reference, we can have the usual lvalue result, otherwise if a cast
+/// is needed by the code generator in an lvalue context, then it must mean that
+/// we need the address of an aggregate in order to access one of its members.
+/// This can happen for all the reasons that casts are permitted with aggregate
+/// result, including noop aggregate casts, and cast from scalar to union.
+LValue CIRGenFunction::emitCastLValue(const CastExpr *e) {
+  switch (e->getCastKind()) {
+  case CK_ToVoid:
+  case CK_BitCast:
+  case CK_LValueToRValueBitCast:
+  case CK_ArrayToPointerDecay:
+  case CK_FunctionToPointerDecay:
+  case CK_NullToMemberPointer:
+  case CK_NullToPointer:
+  case CK_IntegralToPointer:
+  case CK_PointerToIntegral:
+  case CK_PointerToBoolean:
+  case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
+  case CK_IntegralToBoolean:
+  case CK_IntegralToFloating:
+  case CK_FloatingToIntegral:
+  case CK_FloatingToBoolean:
+  case CK_FloatingCast:
+  case CK_FloatingRealToComplex:
+  case CK_FloatingComplexToReal:
+  case CK_FloatingComplexToBoolean:
+  case CK_FloatingComplexCast:
+  case CK_FloatingComplexToIntegralComplex:
+  case CK_IntegralRealToComplex:
+  case CK_IntegralComplexToReal:
+  case CK_IntegralComplexToBoolean:
+  case CK_IntegralComplexCast:
+  case CK_IntegralComplexToFloatingComplex:
+  case CK_DerivedToBaseMemberPointer:
+  case CK_BaseToDerivedMemberPointer:
+  case CK_MemberPointerToBoolean:
+  case CK_ReinterpretMemberPointer:
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_ARCProduceObject:
+  case CK_ARCConsumeObject:
+  case CK_ARCReclaimReturnedObject:
+  case CK_ARCExtendBlockObject:
+  case CK_CopyAndAutoreleaseBlockObject:
+  case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
+  case CK_FixedPointCast:
+  case CK_FixedPointToBoolean:
+  case CK_FixedPointToIntegral:
+  case CK_IntegralToFixedPoint:
+  case CK_MatrixCast:
+  case CK_HLSLVectorTruncation:
+  case CK_HLSLArrayRValue:
+  case CK_HLSLElementwiseCast:
+  case CK_HLSLAggregateSplatCast:
+    llvm_unreachable("unexpected cast lvalue");
+
+  case CK_Dependent:
+    llvm_unreachable("dependent cast kind in IR gen!");
+
+  case CK_BuiltinFnToFnPtr:
+    llvm_unreachable("builtin functions are handled elsewhere");
+
+  // These are never l-values; just use the aggregate emission code.
+  case CK_NonAtomicToAtomic:
+  case CK_AtomicToNonAtomic:
+  case CK_Dynamic:
+  case CK_UncheckedDerivedToBase:
+  case CK_DerivedToBase:
+  case CK_ToUnion:
+  case CK_BaseToDerived:
+  case CK_LValueBitCast:
+  case CK_AddressSpaceConversion:
+  case CK_ObjCObjectLValueCast:
+  case CK_VectorSplat:
+  case CK_ConstructorConversion:
+  case CK_UserDefinedConversion:
+  case CK_CPointerToObjCPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_LValueToRValue: {
+    cgm.errorNYI(e->getSourceRange(),
+                 std::string("emitCastLValue for unhandled cast kind: ") +
+                     e->getCastKindName());
+
+    return {};
+  }
+
+  case CK_NoOp: {
+    // CK_NoOp can model a qualification conversion, which can remove an array
+    // bound and change the IR type.
+    LValue lv = emitLValue(e->getSubExpr());
+    // Propagate the volatile qualifier to LValue, if exists in e.
+    if (e->changesVolatileQualification())
+      cgm.errorNYI(e->getSourceRange(),
+                   "emitCastLValue: NoOp changes volatile qual");
+    if (lv.isSimple()) {
+      Address v = lv.getAddress();
+      if (v.isValid()) {
+        mlir::Type ty = convertTypeForMem(e->getType());
+        if (v.getElementType() != ty)
+          cgm.errorNYI(e->getSourceRange(),
+                       "emitCastLValue: NoOp needs bitcast");
+      }
+    }
+    return lv;
+  }
+
+  case CK_ZeroToOCLOpaqueType:
+    llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid");
+  }
+
+  llvm_unreachable("Invalid cast kind");
+}
+
 LValue CIRGenFunction::emitMemberExpr(const MemberExpr *e) {
   if (isa<VarDecl>(e->getMemberDecl())) {
     cgm.errorNYI(e->getSourceRange(), "emitMemberExpr: VarDecl");
@@ -785,6 +901,21 @@ LValue CIRGenFunction::emitMemberExpr(const MemberExpr *e) {
   llvm_unreachable("Unhandled member declaration!");
 }
 
+LValue CIRGenFunction::emitCallExprLValue(const CallExpr *e) {
+  RValue rv = emitCallExpr(e);
+
+  if (!rv.isScalar()) {
+    cgm.errorNYI(e->getSourceRange(), "emitCallExprLValue: non-scalar return");
+    return {};
+  }
+
+  assert(e->getCallReturnType(getContext())->isReferenceType() &&
+         "Can't have a scalar return unless the return type is a "
+         "reference type!");
+
+  return makeNaturalAlignPointeeAddrLValue(rv.getScalarVal(), e->getType());
+}
+
 LValue CIRGenFunction::emitBinaryOperatorLValue(const BinaryOperator *e) {
   // Comma expressions just emit their LHS then their RHS as an l-value.
   if (e->getOpcode() == BO_Comma) {
@@ -954,10 +1085,14 @@ RValue CIRGenFunction::emitCallExpr(const clang::CallExpr *e,
   }
 
   if (const auto *operatorCall = dyn_cast<CXXOperatorCallExpr>(e)) {
-    if (isa_and_nonnull<CXXMethodDecl>(operatorCall->getCalleeDecl())) {
-      cgm.errorNYI(e->getSourceRange(), "call to member operator");
-      return RValue::get(nullptr);
-    }
+    // If the callee decl is a CXXMethodDecl, we need to emit this as a C++
+    // operator member call.
+    if (const CXXMethodDecl *md =
+            dyn_cast_or_null<CXXMethodDecl>(operatorCall->getCalleeDecl()))
+      return emitCXXOperatorMemberCallExpr(operatorCall, md, returnValue);
+    // A CXXOperatorCallExpr is created even for explicit object methods, but
+    // these should be treated like static function calls. Fall through to do
+    // that.
   }
 
   CIRGenCallee callee = emitCallee(e->getCallee());
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 6bfad71f241dc..c3798de79d969 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -476,6 +476,18 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn,
   return fn;
 }
 
+/// Given a value of type T* that may not be to a complete object, construct
+/// an l-vlaue withi the natural pointee alignment of T.
+LValue CIRGenFunction::makeNaturalAlignPointeeAddrLValue(mlir::Value val,
+                                                         QualType ty) {
+  // FIXME(cir): is it safe to assume Op->getResult(0) is valid? Perhaps
+  // assert on the result type first.
+  LValueBaseInfo baseInfo;
+  assert(!cir::MissingFeatures::opTBAA());
+  CharUnits align = cgm.getNaturalTypeAlignment(ty, &baseInfo);
+  return makeAddrLValue(Address(val, align), ty, baseInfo);
+}
+
 clang::QualType CIRGenFunction::buildFunctionArgList(clang::GlobalDecl gd,
                                                      FunctionArgList &args) {
   const auto *fd = cast<FunctionDecl>(gd.getDecl());
@@ -536,10 +548,20 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
                  "CompoundAssignOperator with ComplexType");
     return LValue();
   }
+  case Expr::CallExprClass:
+  case Expr::CXXMemberCallExprClass:
+  case Expr::CXXOperatorCallExprClass:
+  case Expr::UserDefinedLiteralClass:
+    return emitCallExprLValue(cast<CallExpr>(e));
   case Expr::ParenExprClass:
     return emitLValue(cast<ParenExpr>(e)->getSubExpr());
   case Expr::DeclRefExprClass:
     return emitDeclRefLValue(cast<DeclRefExpr>(e));
+  case Expr::CStyleCastExprClass:
+  case Expr::CXXStaticCastExprClass:
+  case Expr::CXXDynamicCastExprClass:
+  case Expr::ImplicitCastExprClass:
+    return emitCastLValue(cast<CastExpr>(e));
   }
 }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index dbcc6ad832b34..4921763649039 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -282,6 +282,8 @@ class CIRGenFunction : public CIRGenTypeCache {
     // TODO: Add symbol table support
   }
 
+  LValue makeNaturalAlignPointeeAddrLValue(mlir::Value v, clang::QualType t);
+
   /// Construct an address with the natural alignment of T. If a pointer to T
   /// is expected to be signed, the pointer passed to this function must have
   /// been signed, and the returned Address will have the pointer authentication
@@ -515,6 +517,7 @@ class CIRGenFunction : public CIRGenTypeCache {
       AbstractCallee callee = AbstractCallee(), unsigned paramsToSkip = 0);
   RValue emitCallExpr(const clang::CallExpr *e,
                       ReturnValueSlot returnValue = ReturnValueSlot());
+  LValue emitCallExprLValue(const clang::CallExpr *e);
   CIRGenCallee emitCallee(const clang::Expr *e);
 
   template <typename T>
@@ -527,6 +530,8 @@ class CIRGenFunction : public CIRGenTypeCache {
                                    mlir::Type condType,
                                    bool buildingTopLevelCase);
 
+  LValue emitCastLValue(const CastExpr *e);
+
   LValue emitCompoundAssignmentLValue(const clang::CompoundAssignOperator *e);
 
   mlir::LogicalResult emitContinueStmt(const clang::ContinueStmt &s);
@@ -549,6 +554,10 @@ class CIRGenFunction : public CIRGenTypeCache {
       clang::NestedNameSpecifier *qualifier, bool isArrow,
       const clang::Expr *base);
 
+  RValue emitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *e,
+                                       const CXXMethodDecl *md,
+                                       ReturnValueSlot returnValue);
+
   mlir::LogicalResult emitDoStmt(const clang::DoStmt &s);
 
   /// Emit an expression as an initializer for an object (variable, field, etc.)
diff --git a/clang/test/CIR/CodeGen/forrange.cpp b/clang/test/CIR/CodeGen/forrange.cpp
index 80b936318334c..8a3570a35a325 100644
--- a/clang/test/CIR/CodeGen/forrange.cpp
+++ b/clang/test/CIR/CodeGen/forrange.cpp
@@ -47,3 +47,87 @@ void for_range() {
 // CIR:        cir.yield
 // CIR:      }
 // CIR:    }
+
+struct C2 {
+  Element *begin();
+  Element *end();
+};
+
+void for_range2() {
+  C2 c;
+  for (Element &e : c)
+    ;
+}
+
+// CIR: cir.func @_Z10for_range2v()
+// CIR:    %[[C_ADDR:.*]] = cir.alloca !rec_C2{{.*}} ["c"]
+// CIR:    cir.scope {
+// CIR:      %[[RANGE_ADDR:.*]] = cir.alloca !cir.ptr<!rec_C2>{{.*}} ["__range1", init, const]
+// CIR:      %[[BEGIN_ADDR:.*]] = cir.alloca !cir.ptr<!rec_Element>{{.*}} ["__begin1", init]
+// CIR:      %[[END_ADDR:.*]] = cir.alloca !cir.ptr<!rec_Element>{{.*}} ["__end1", init]
+// CIR:      %[[E_ADDR:.*]] = cir.alloca !cir.ptr<!rec_Element>{{.*}} ["e", init, const]
+// CIR:      cir.store %[[C_ADDR]], %[[RANGE_ADDR]]
+// CIR:      %[[C_REF:.*]] = cir.load %[[RANGE_ADDR]]
+// CIR:      %[[BEGIN:.*]] = cir.call @_ZN2C25beginEv(%[[C_REF]])
+// CIR:      cir.store %[[BEGIN]], %[[BEGIN_ADDR]]
+// CIR:      %[[C_REF2:.*]] = cir.load %[[RANGE_ADDR]]
+// CIR:      %[[END:.*]] = cir.call @_ZN2C23endEv(%[[C_REF2]])
+// CIR:      cir.store %[[END]], %[[END_ADDR]]
+// CIR:      cir.for : cond {
+// CIR:        %[[BEGIN:.*]] = cir.load %[[BEGIN_ADDR]]
+// CIR:        %[[END:.*]] = cir.load %[[END_ADDR]]
+// CIR:        %[[CMP:.*]] = cir.cmp(ne, %[[BEGIN]], %[[END]])
+// CIR:        cir.condition(%[[CMP]])
+// CIR:      } body {
+// CIR:        %[[E:.*]] = cir.load deref %[[BEGIN_ADDR]]
+// CIR:        cir.store %[[E]], %[[E_ADDR]]
+// CIR:        cir.yield
+// CIR:      } step {
+// CIR:        %[[BEGIN:.*]] = cir.load %[[BEGIN_ADDR]]
+// CIR:        %[[STEP:.*]] = cir.const #cir.int<1>
+// CIR:        %[[NEXT:.*]] = cir.ptr_stride(%[[BEGIN]] {{.*}}, %[[STEP]] {{.*}})
+// CIR:        cir.store %[[NEXT]], %[[BEGIN_ADDR]]
+// CIR:        cir.yield
+// CIR:      }
+// CIR:    }
+
+// Iterator class definition
+class Iterator {
+public:
+  Element& operator*();
+  Iterator& operator++();
+  bool operator!=(const Iterator& other) const;
+};
+
+class C3 {
+public:
+  Iterator begin();
+  Iterator end();
+};
+
+void for_range3() {
+  C3 c;
+  for (Element& e : c)
+    ;
+}
+
+// CIR: cir.func @_Z10for_range3v()
+// CIR:    %[[C_ADDR:.*]] = cir.alloca !rec_C3{{.*}} ["c"]
+// CIR:    cir.scope {
+// CIR:      %[[RANGE_ADDR:.*]] = cir.alloca !cir.ptr<!rec_C3>{{.*}} ["__range1", init, const]
+// CIR:      %[[BEGIN_ADDR:.*]] = cir.alloca !rec_Iterator, !cir.ptr<!rec_Iterator>{{.*}} ["__begin1"]
+// CIR:      %[[END_ADDR:.*]] = cir.alloca !rec_Iterator, !cir.ptr<!rec_Iterator>{{.*}} ["__end1"]
+// CIR:      %[[E_ADDR:.*]] = cir.alloca !cir.ptr<!rec_Element>{{.*}} ["e", init, const]
+// CIR:      cir.store %[[C_ADDR]], %[[RANGE_ADDR]]
+// CIR:      cir.for : cond {
+// CIR:        %[[ITER_NE:.*]] = cir.call @_ZNK8IteratorneERKS_(%[[BEGIN_ADDR]], %[[END_ADDR]])
+// CIR:        cir.condition(%[[ITER_NE]])
+// CIR:      } body {
+// CIR:        %[[E:.*]] = cir.call @_ZN8IteratordeEv(%[[BEGIN_ADDR]])
+// CIR:        cir.store %[[E]], %[[E_ADDR]]
+// CIR:        cir.yield
+// CIR:      } step {
+// CIR:        %[[ITER_NEXT:.*]] = cir.call @_ZN8IteratorppEv(%[[BEGIN_ADDR]])
+// CIR:        cir.yield
+// CIR:      }
+// CIR:    }

``````````

</details>


https://github.com/llvm/llvm-project/pull/140636