[clang] [CIR] Upstream support for accessing structure members (PR #136383)

Mon Apr 21 14:45:07 PDT 2025

https://github.com/andykaylor updated https://github.com/llvm/llvm-project/pull/136383

>From 17800eddb70a74cf3966d64ab7bc5ad5508f8bb0 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akaylor at nvidia.com>
Date: Fri, 18 Apr 2025 15:02:22 -0700
Subject: [PATCH 1/3] [CIR] Upstream support for accessing structure members

This adds ClangIR support for accessing structure members. Access to
union members is deferred to a later change.
---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      |   6 +
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |  64 +++++++++
 .../include/clang/CIR/Dialect/IR/CIRTypes.td  |   3 +
 clang/include/clang/CIR/MissingFeatures.h     |   2 +
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp          | 134 ++++++++++++++++++
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp    |  15 ++
 clang/lib/CIR/CodeGen/CIRGenFunction.cpp      |   2 +
 clang/lib/CIR/CodeGen/CIRGenFunction.h        |   7 +
 clang/lib/CIR/CodeGen/CIRGenModule.h          |   7 +
 clang/lib/CIR/CodeGen/CIRGenRecordLayout.h    |   8 +-
 clang/lib/CIR/CodeGen/CIRGenTypes.cpp         |  21 +++
 clang/lib/CIR/CodeGen/CIRGenTypes.h           |   2 +
 clang/lib/CIR/CodeGen/CIRGenValue.h           |   7 +
 clang/lib/CIR/Dialect/IR/CIRDialect.cpp       |  18 +++
 clang/lib/CIR/Dialect/IR/CIRTypes.cpp         |  31 ++++
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  24 ++++
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h   |  10 ++
 clang/test/CIR/CodeGen/struct.c               | 134 +++++++++++++++---
 18 files changed, 471 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index d2a241964f34f..5eac4fedaec75 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -191,6 +191,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     return create<cir::StoreOp>(loc, val, dst);
   }
 
+  cir::GetMemberOp createGetMember(mlir::Location loc, mlir::Type resultTy,
+                                   mlir::Value base, llvm::StringRef name,
+                                   unsigned index) {
+    return create<cir::GetMemberOp>(loc, resultTy, base, name, index);
+  }
+
   mlir::Value createDummyValue(mlir::Location loc, mlir::Type type,
                                clang::CharUnits alignment) {
     auto addr = createAlloca(loc, getPointerTo(type), type, {},
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 5ba4b33dc1a12..80b875b2c94ce 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -1310,6 +1310,70 @@ def GetGlobalOp : CIR_Op<"get_global",
   let hasVerifier = 0;
 }
 
+//===----------------------------------------------------------------------===//
+// GetMemberOp
+//===----------------------------------------------------------------------===//
+
+def GetMemberOp : CIR_Op<"get_member"> {
+  let summary = "Get the address of a member of a record";
+  let description = [{
+    The `cir.get_member` operation gets the address of a particular named
+    member from the input record.
+
+    It expects a pointer to the base record as well as the name of the member
+    and its field index.
+
+    Example:
+    ```mlir
+    // Suppose we have a record with multiple members.
+    !s32i = !cir.int<s, 32>
+    !s8i = !cir.int<s, 32>
+    !ty_B = !cir.record<"struct.B" {!s32i, !s8i}>
+
+    // Get the address of the member at index 1.
+    %1 = cir.get_member %0[1] {name = "i"} : (!cir.ptr<!ty_B>) -> !cir.ptr<!s8i>
+    ```
+  }];
+
+  let arguments = (ins
+    Arg<CIR_PointerType, "the address to load from", [MemRead]>:$addr,
+    StrAttr:$name,
+    IndexAttr:$index_attr);
+
+  let results = (outs Res<CIR_PointerType, "">:$result);
+
+  let assemblyFormat = [{
+    $addr `[` $index_attr `]` attr-dict
+    `:` qualified(type($addr)) `->` qualified(type($result))
+  }];
+
+  let builders = [
+    OpBuilder<(ins "mlir::Type":$type,
+                   "mlir::Value":$value,
+                   "llvm::StringRef":$name,
+                   "unsigned":$index),
+    [{
+      mlir::APInt fieldIdx(64, index);
+      build($_builder, $_state, type, value, name, fieldIdx);
+    }]>
+  ];
+
+  let extraClassDeclaration = [{
+    /// Return the index of the record member being accessed.
+    uint64_t getIndex() { return getIndexAttr().getZExtValue(); }
+
+    /// Return the record type pointed by the base pointer.
+    cir::PointerType getAddrTy() { return getAddr().getType(); }
+
+    /// Return the result type.
+    cir::PointerType getResultTy() {
+      return mlir::cast<cir::PointerType>(getResult().getType());
+    }
+  }];
+
+  let hasVerifier = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // FuncOp
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
index b028bc7db4e59..27ee5389723fa 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
@@ -503,6 +503,9 @@ def CIR_RecordType : CIR_Type<"Record", "record",
     void complete(llvm::ArrayRef<mlir::Type> members, bool packed,
                   bool isPadded);
 
+    uint64_t getElementOffset(const mlir::DataLayout &dataLayout,
+              unsigned idx) const;
+
   private:
     unsigned computeStructSize(const mlir::DataLayout &dataLayout) const;
     uint64_t computeStructAlignment(const mlir::DataLayout &dataLayout) const;
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 0105d1bdaf3fd..6bfc1199aea55 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -157,6 +157,8 @@ struct MissingFeatures {
   static bool emitCheckedInBoundsGEP() { return false; }
   static bool preservedAccessIndexRegion() { return false; }
   static bool bitfields() { return false; }
+  static bool typeChecks() { return false; }
+  static bool lambdaFieldToName() { return false; }
 
   // Missing types
   static bool dataMemberType() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index aca26526b79f2..0a518c0fd935d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -27,6 +27,38 @@ using namespace clang;
 using namespace clang::CIRGen;
 using namespace cir;
 
+/// Get the address of a zero-sized field within a record. The resulting address
+/// doesn't necessarily have the right type.
+Address CIRGenFunction::emitAddrOfFieldStorage(Address base,
+                                               const FieldDecl *field,
+                                               llvm::StringRef fieldName,
+                                               unsigned fieldIndex) {
+  if (field->isZeroSize(getContext())) {
+    cgm.errorNYI(field->getSourceRange(),
+                 "emitAddrOfFieldStorage: zero-sized field");
+    return Address::invalid();
+  }
+
+  mlir::Location loc = getLoc(field->getLocation());
+
+  mlir::Type fieldType = convertType(field->getType());
+  auto fieldPtr = cir::PointerType::get(builder.getContext(), fieldType);
+  // For most cases fieldName is the same as field->getName() but for lambdas,
+  // which do not currently carry the name, so it can be passed down from the
+  // CaptureStmt.
+  cir::GetMemberOp memberAddr = builder.createGetMember(
+      loc, fieldPtr, base.getPointer(), fieldName, fieldIndex);
+
+  // Retrieve layout information, compute alignment and return the final
+  // address.
+  const RecordDecl *rec = field->getParent();
+  const CIRGenRecordLayout &layout = cgm.getTypes().getCIRGenRecordLayout(rec);
+  unsigned idx = layout.getCIRFieldNo(field);
+  CharUnits offset = CharUnits::fromQuantity(
+      layout.getCIRType().getElementOffset(cgm.getDataLayout().layout, idx));
+  return Address(memberAddr, base.getAlignment().alignmentAtOffset(offset));
+}
+
 /// Given an expression of pointer type, try to
 /// derive a more accurate bound on the alignment of the pointer.
 Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr,
@@ -264,6 +296,66 @@ mlir::Value CIRGenFunction::emitStoreThroughBitfieldLValue(RValue src,
   return {};
 }
 
+LValue CIRGenFunction::emitLValueForField(LValue base, const FieldDecl *field) {
+  LValueBaseInfo baseInfo = base.getBaseInfo();
+
+  if (field->isBitField()) {
+    cgm.errorNYI(field->getSourceRange(), "emitLValueForField: bitfield");
+    return LValue();
+  }
+
+  QualType fieldType = field->getType();
+  const RecordDecl *rec = field->getParent();
+  AlignmentSource baseAlignSource = baseInfo.getAlignmentSource();
+  LValueBaseInfo fieldBaseInfo(getFieldAlignmentSource(baseAlignSource));
+  assert(!cir::MissingFeatures::opTBAA());
+
+  Address addr = base.getAddress();
+  if (auto *classDef = dyn_cast<CXXRecordDecl>(rec)) {
+    cgm.errorNYI(field->getSourceRange(), "emitLValueForField: C++ class");
+    return LValue();
+  }
+
+  unsigned recordCVR = base.getVRQualifiers();
+  if (rec->isUnion()) {
+    cgm.errorNYI(field->getSourceRange(), "emitLValueForField: union");
+    return LValue();
+  }
+
+  assert(!cir::MissingFeatures::preservedAccessIndexRegion());
+  llvm::StringRef fieldName = field->getName();
+  const CIRGenRecordLayout &layout =
+      cgm.getTypes().getCIRGenRecordLayout(field->getParent());
+  unsigned fieldIndex = layout.getCIRFieldNo(field);
+
+  assert(!cir::MissingFeatures::lambdaFieldToName());
+
+  addr = emitAddrOfFieldStorage(addr, field, fieldName, fieldIndex);
+
+  // If this is a reference field, load the reference right now.
+  if (fieldType->isReferenceType()) {
+    cgm.errorNYI(field->getSourceRange(), "emitLValueForField: reference type");
+    return LValue();
+  }
+
+  if (field->hasAttr<AnnotateAttr>()) {
+    cgm.errorNYI(field->getSourceRange(), "emitLValueForField: AnnotateAttr");
+    return LValue();
+  }
+
+  LValue lv = makeAddrLValue(addr, fieldType, fieldBaseInfo);
+  lv.getQuals().addCVRQualifiers(recordCVR);
+
+  // __weak attribute on a field is ignored.
+  if (lv.getQuals().getObjCGCAttr() == Qualifiers::Weak) {
+    cgm.errorNYI(field->getSourceRange(),
+                 "emitLValueForField: __weak attribute");
+    return LValue();
+  }
+
+  return lv;
+}
+
 mlir::Value CIRGenFunction::emitToMemory(mlir::Value value, QualType ty) {
   // Bool has a different representation in memory than in registers,
   // but in ClangIR, it is simply represented as a cir.bool value.
@@ -608,6 +700,48 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) {
   return lv;
 }
 
+LValue CIRGenFunction::emitMemberExpr(const MemberExpr *e) {
+  if (auto *vd = dyn_cast<VarDecl>(e->getMemberDecl())) {
+    cgm.errorNYI(e->getSourceRange(), "emitMemberExpr: VarDecl");
+    return LValue();
+  }
+
+  Expr *baseExpr = e->getBase();
+  // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
+  LValue baseLV;
+  if (e->isArrow()) {
+    LValueBaseInfo baseInfo;
+    assert(!cir::MissingFeatures::opTBAA());
+    Address addr = emitPointerWithAlignment(baseExpr, &baseInfo);
+    QualType ptrTy = baseExpr->getType()->getPointeeType();
+    assert(!cir::MissingFeatures::typeChecks());
+    baseLV = makeAddrLValue(addr, ptrTy, baseInfo);
+  } else {
+    assert(!cir::MissingFeatures::typeChecks());
+    baseLV = emitLValue(baseExpr);
+  }
+
+  const NamedDecl *nd = e->getMemberDecl();
+  if (auto *field = dyn_cast<FieldDecl>(nd)) {
+    LValue lv = emitLValueForField(baseLV, field);
+    assert(!cir::MissingFeatures::setObjCGCLValueClass());
+    if (getLangOpts().OpenMP) {
+      // If the member was explicitly marked as nontemporal, mark it as
+      // nontemporal. If the base lvalue is marked as nontemporal, mark access
+      // to children as nontemporal too.
+      cgm.errorNYI(e->getSourceRange(), "emitMemberExpr: OpenMP");
+    }
+    return lv;
+  }
+
+  if (const auto *fd = dyn_cast<FunctionDecl>(nd)) {
+    cgm.errorNYI(e->getSourceRange(), "emitMemberExpr: FunctionDecl");
+    return LValue();
+  }
+
+  llvm_unreachable("Unhandled member declaration!");
+}
+
 LValue CIRGenFunction::emitBinaryOperatorLValue(const BinaryOperator *e) {
   // Comma expressions just emit their LHS then their RHS as an l-value.
   if (e->getOpcode() == BO_Comma) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 1bef1b976a4b5..05b337b52cbe9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -171,6 +171,8 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
     return emitLoadOfLValue(e);
   }
 
+  mlir::Value VisitMemberExpr(MemberExpr *e);
+
   mlir::Value VisitExplicitCastExpr(ExplicitCastExpr *e) {
     return VisitCastExpr(e);
   }
@@ -1529,6 +1531,19 @@ mlir::Value ScalarExprEmitter::VisitCallExpr(const CallExpr *e) {
   return v;
 }
 
+mlir::Value ScalarExprEmitter::VisitMemberExpr(MemberExpr *e) {
+  // TODO(cir): The classic codegen calls tryEmitAsConstant() here. Folding
+  // constants sound like work for MLIR optimizers, but we'll keep an assertion
+  // for now.
+  assert(!cir::MissingFeatures::tryEmitAsConstant());
+  Expr::EvalResult result;
+  if (e->EvaluateAsInt(result, cgf.getContext(), Expr::SE_AllowSideEffects)) {
+    cgf.cgm.errorNYI(e->getSourceRange(), "Constant interger member expr");
+    // Fall through to emit this as a non-constant access.
+  }
+  return emitLoadOfLValue(e);
+}
+
 mlir::Value CIRGenFunction::emitScalarConversion(mlir::Value src,
                                                  QualType srcTy, QualType dstTy,
                                                  SourceLocation loc) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 76e9ca4fd61a8..5412f9f602711 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -513,6 +513,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
     return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e));
   case Expr::UnaryOperatorClass:
     return emitUnaryOpLValue(cast<UnaryOperator>(e));
+  case Expr::MemberExprClass:
+    return emitMemberExpr(cast<MemberExpr>(e));
   case Expr::BinaryOperatorClass:
     return emitBinaryOperatorLValue(cast<BinaryOperator>(e));
   case Expr::ParenExprClass:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 01abd84ce1c85..f533d0ab53cd2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -423,6 +423,10 @@ class CIRGenFunction : public CIRGenTypeCache {
                               clang::CharUnits alignment);
 
 public:
+  Address emitAddrOfFieldStorage(Address base, const FieldDecl *field,
+                                 llvm::StringRef fieldName,
+                                 unsigned fieldIndex);
+
   mlir::Value emitAlloca(llvm::StringRef name, mlir::Type ty,
                          mlir::Location loc, clang::CharUnits alignment,
                          bool insertIntoFnEntryBlock,
@@ -551,6 +555,9 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// of the expression.
   /// FIXME: document this function better.
   LValue emitLValue(const clang::Expr *e);
+  LValue emitLValueForField(LValue base, const clang::FieldDecl *field);
+
+  LValue emitMemberExpr(const MemberExpr *e);
 
   /// Given an expression with a pointer type, emit the value and compute our
   /// best estimate of the alignment of the pointee.
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 1e0d6623c4f40..1fb97334d7bd2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -19,6 +19,7 @@
 #include "CIRGenValue.h"
 
 #include "clang/AST/CharUnits.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 
 #include "TargetInfo.h"
@@ -95,6 +96,12 @@ class CIRGenModule : public CIRGenTypeCache {
   const clang::LangOptions &getLangOpts() const { return langOpts; }
   mlir::MLIRContext &getMLIRContext() { return *builder.getContext(); }
 
+  const cir::CIRDataLayout getDataLayout() const {
+    // FIXME(cir): instead of creating a CIRDataLayout every time, set it as an
+    // attribute for the CIRModule class.
+    return cir::CIRDataLayout(theModule);
+  }
+
   /// -------
   /// Handling globals
   /// -------
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index a51e0460d1074..39a9d16ffd766 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -42,10 +42,10 @@ class CIRGenRecordLayout {
   cir::RecordType getCIRType() const { return completeObjectType; }
 
   /// Return cir::RecordType element number that corresponds to the field FD.
-  unsigned getCIRFieldNo(const clang::FieldDecl *FD) const {
-    FD = FD->getCanonicalDecl();
-    assert(fieldInfo.count(FD) && "Invalid field for record!");
-    return fieldInfo.lookup(FD);
+  unsigned getCIRFieldNo(const clang::FieldDecl *fd) const {
+    fd = fd->getCanonicalDecl();
+    assert(fieldInfo.count(fd) && "Invalid field for record!");
+    return fieldInfo.lookup(fd);
   }
 };
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index 7bd86cf0c7bcd..90993c71be9a6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -446,6 +446,27 @@ mlir::Type CIRGenTypes::convertTypeForMem(clang::QualType qualType,
   return convertedType;
 }
 
+/// Return record layout info for the given record decl.
+const CIRGenRecordLayout &
+CIRGenTypes::getCIRGenRecordLayout(const RecordDecl *rd) {
+  const auto *key = astContext.getTagDeclType(rd).getTypePtr();
+
+  // If we have already computed the layout, return it.
+  auto it = cirGenRecordLayouts.find(key);
+  if (it != cirGenRecordLayouts.end())
+    return *it->second;
+
+  // Compute the type information.
+  convertRecordDeclType(rd);
+
+  // Now try again.
+  it = cirGenRecordLayouts.find(key);
+
+  assert(it != cirGenRecordLayouts.end() &&
+         "Unable to find record layout information for type");
+  return *it->second;
+}
+
 bool CIRGenTypes::isZeroInitializable(clang::QualType t) {
   if (t->getAs<PointerType>())
     return astContext.getTargetNullPointerValue(t) == 0;
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h
index 2bb78420700f8..5b4027601ca3a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h
@@ -108,6 +108,8 @@ class CIRGenTypes {
   std::string getRecordTypeName(const clang::RecordDecl *,
                                 llvm::StringRef suffix);
 
+  const CIRGenRecordLayout &getCIRGenRecordLayout(const clang::RecordDecl *rd);
+
   /// Convert type T into an mlir::Type. This differs from convertType in that
   /// it is used to convert to the memory representation for a type. For
   /// example, the scalar representation for bool is i1, but the memory
diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h
index d4d6f5a44622e..1c453dc9c86b5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenValue.h
+++ b/clang/lib/CIR/CodeGen/CIRGenValue.h
@@ -140,6 +140,10 @@ class LValue {
   // TODO: Add support for volatile
   bool isVolatile() const { return false; }
 
+  unsigned getVRQualifiers() const {
+    return quals.getCVRQualifiers() & ~clang::Qualifiers::Const;
+  }
+
   clang::QualType getType() const { return type; }
 
   mlir::Value getPointer() const { return v; }
@@ -154,6 +158,9 @@ class LValue {
   }
 
   const clang::Qualifiers &getQuals() const { return quals; }
+  clang::Qualifiers &getQuals() { return quals; }
+
+  LValueBaseInfo getBaseInfo() const { return baseInfo; }
 
   static LValue makeAddr(Address address, clang::QualType t,
                          LValueBaseInfo baseInfo) {
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index d2313e75870b4..fb85052b465f1 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -1096,6 +1096,24 @@ OpFoldResult cir::UnaryOp::fold(FoldAdaptor adaptor) {
   return {};
 }
 
+//===----------------------------------------------------------------------===//
+// GetMemberOp Definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult cir::GetMemberOp::verify() {
+  const auto recordTy = dyn_cast<RecordType>(getAddrTy().getPointee());
+  if (!recordTy)
+    return emitError() << "expected pointer to a record type";
+
+  if (recordTy.getMembers().size() <= getIndex())
+    return emitError() << "member index out of bounds";
+
+  if (recordTy.getMembers()[getIndex()] != getResultTy().getPointee())
+    return emitError() << "member type mismatch";
+
+  return mlir::success();
+}
+
 //===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
index 8b5646f339bb3..c7ac02b7984a3 100644
--- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
@@ -309,6 +309,37 @@ RecordType::computeStructAlignment(const mlir::DataLayout &dataLayout) const {
   return recordAlignment;
 }
 
+uint64_t RecordType::getElementOffset(const ::mlir::DataLayout &dataLayout,
+                                      unsigned idx) const {
+  assert(idx < getMembers().size() && "access not valid");
+
+  // All union elements are at offset zero.
+  if (isUnion() || idx == 0)
+    return 0;
+
+  assert(isComplete() && "Cannot get layout of incomplete records");
+  assert(idx < getNumElements());
+  llvm::ArrayRef<mlir::Type> members = getMembers();
+
+  unsigned offset = 0;
+
+  for (unsigned i = 0, e = idx; i != e; ++i) {
+    mlir::Type ty = members[i];
+
+    // This matches LLVM since it uses the ABI instead of preferred alignment.
+    const llvm::Align tyAlign =
+        llvm::Align(getPacked() ? 1 : dataLayout.getTypeABIAlignment(ty));
+
+    // Add padding if necessary to align the data element properly.
+    offset = llvm::alignTo(offset, tyAlign);
+
+    // Consume space for this data item
+    offset += dataLayout.getTypeSize(ty);
+  }
+
+  return offset;
+}
+
 //===----------------------------------------------------------------------===//
 // IntType Definitions
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 8c4a67258df3f..389d1f457fc3b 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1467,6 +1467,7 @@ void ConvertCIRToLLVMPass::runOnOperation() {
                CIRToLLVMConstantOpLowering,
                CIRToLLVMFuncOpLowering,
                CIRToLLVMGetGlobalOpLowering,
+               CIRToLLVMGetMemberOpLowering,
                CIRToLLVMTrapOpLowering,
                CIRToLLVMUnaryOpLowering
       // clang-format on
@@ -1496,6 +1497,29 @@ mlir::LogicalResult CIRToLLVMBrOpLowering::matchAndRewrite(
   return mlir::LogicalResult::success();
 }
 
+mlir::LogicalResult CIRToLLVMGetMemberOpLowering::matchAndRewrite(
+    cir::GetMemberOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type llResTy = getTypeConverter()->convertType(op.getType());
+  const auto recordTy =
+      mlir::cast<cir::RecordType>(op.getAddrTy().getPointee());
+  assert(recordTy && "expected record type");
+
+  switch (recordTy.getKind()) {
+  case cir::RecordType::Struct: {
+    // Since the base address is a pointer to an aggregate, the first offset
+    // is always zero. The second offset tell us which member it will access.
+    llvm::SmallVector<mlir::LLVM::GEPArg, 2> offset{0, op.getIndex()};
+    const mlir::Type elementTy = getTypeConverter()->convertType(recordTy);
+    rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(op, llResTy, elementTy,
+                                                   adaptor.getAddr(), offset);
+    return mlir::success();
+  }
+  case cir::RecordType::Union:
+    return op.emitError() << "NYI: union get_member lowering";
+  }
+}
+
 mlir::LogicalResult CIRToLLVMTrapOpLowering::matchAndRewrite(
     cir::TrapOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 1de6c9c56b485..90f28d256527f 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -218,6 +218,16 @@ class CIRToLLVMBrOpLowering : public mlir::OpConversionPattern<cir::BrOp> {
                   mlir::ConversionPatternRewriter &) const override;
 };
 
+class CIRToLLVMGetMemberOpLowering
+    : public mlir::OpConversionPattern<cir::GetMemberOp> {
+public:
+  using mlir::OpConversionPattern<cir::GetMemberOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::GetMemberOp op, OpAdaptor,
+                  mlir::ConversionPatternRewriter &) const override;
+};
+
 class CIRToLLVMTrapOpLowering : public mlir::OpConversionPattern<cir::TrapOp> {
 public:
   using mlir::OpConversionPattern<cir::TrapOp>::OpConversionPattern;
diff --git a/clang/test/CIR/CodeGen/struct.c b/clang/test/CIR/CodeGen/struct.c
index 3dc1655e15d2c..b78a2367bda3a 100644
--- a/clang/test/CIR/CodeGen/struct.c
+++ b/clang/test/CIR/CodeGen/struct.c
@@ -7,23 +7,23 @@
 
 // For LLVM IR checks, the structs are defined before the variables, so these
 // checks are at the top.
-// LLVM: %struct.CompleteS = type { i32, i8 }
-// LLVM: %struct.OuterS = type { %struct.InnerS, i32 }
-// LLVM: %struct.InnerS = type { i32, i8 }
-// LLVM: %struct.PackedS = type <{ i32, i8 }>
-// LLVM: %struct.PackedAndPaddedS = type <{ i32, i8, i8 }>
-// OGCG: %struct.CompleteS = type { i32, i8 }
-// OGCG: %struct.OuterS = type { %struct.InnerS, i32 }
-// OGCG: %struct.InnerS = type { i32, i8 }
-// OGCG: %struct.PackedS = type <{ i32, i8 }>
-// OGCG: %struct.PackedAndPaddedS = type <{ i32, i8, i8 }>
+// LLVM-DAG: %struct.CompleteS = type { i32, i8 }
+// LLVM-DAG: %struct.OuterS = type { %struct.InnerS, i32 }
+// LLVM-DAG: %struct.InnerS = type { i32, i8 }
+// LLVM-DAG: %struct.PackedS = type <{ i32, i8 }>
+// LLVM-DAG: %struct.PackedAndPaddedS = type <{ i32, i8, i8 }>
+// OGCG-DAG: %struct.CompleteS = type { i32, i8 }
+// OGCG-DAG: %struct.OuterS = type { %struct.InnerS, i32 }
+// OGCG-DAG: %struct.InnerS = type { i32, i8 }
+// OGCG-DAG: %struct.PackedS = type <{ i32, i8 }>
+// OGCG-DAG: %struct.PackedAndPaddedS = type <{ i32, i8, i8 }>
 
 struct IncompleteS *p;
 
 // CIR:      cir.global external @p = #cir.ptr<null> : !cir.ptr<!cir.record<struct
 // CIR-SAME:     "IncompleteS" incomplete>>
-// LLVM: @p = dso_local global ptr null
-// OGCG: @p = global ptr null, align 8
+// LLVM-DAG: @p = dso_local global ptr null
+// OGCG-DAG: @p = global ptr null, align 8
 
 struct CompleteS {
   int a;
@@ -32,8 +32,8 @@ struct CompleteS {
 
 // CIR:       cir.global external @cs = #cir.zero : !cir.record<struct
 // CIR-SAME:      "CompleteS" {!s32i, !s8i}>
-// LLVM:      @cs = dso_local global %struct.CompleteS zeroinitializer
-// OGCG:      @cs = global %struct.CompleteS zeroinitializer, align 4
+// LLVM-DAG:      @cs = dso_local global %struct.CompleteS zeroinitializer
+// OGCG-DAG:      @cs = global %struct.CompleteS zeroinitializer, align 4
 
 struct InnerS {
   int a;
@@ -49,8 +49,8 @@ struct OuterS os;
 
 // CIR:       cir.global external @os = #cir.zero : !cir.record<struct
 // CIR-SAME:      "OuterS" {!cir.record<struct "InnerS" {!s32i, !s8i}>, !s32i}>
-// LLVM:      @os = dso_local global %struct.OuterS zeroinitializer
-// OGCG:      @os = global %struct.OuterS zeroinitializer, align 4
+// LLVM-DAG:      @os = dso_local global %struct.OuterS zeroinitializer
+// OGCG-DAG:      @os = global %struct.OuterS zeroinitializer, align 4
 
 #pragma pack(push)
 #pragma pack(1)
@@ -62,8 +62,8 @@ struct PackedS {
 
 // CIR:       cir.global external @ps = #cir.zero : !cir.record<struct "PackedS"
 // CIR-SAME:      packed {!s32i, !s8i}>
-// LLVM:      @ps = dso_local global %struct.PackedS zeroinitializer
-// OGCG:      @ps = global %struct.PackedS zeroinitializer, align 1
+// LLVM-DAG:      @ps = dso_local global %struct.PackedS zeroinitializer
+// OGCG-DAG:      @ps = global %struct.PackedS zeroinitializer, align 1
 
 struct PackedAndPaddedS {
   int  b0;
@@ -72,8 +72,8 @@ struct PackedAndPaddedS {
 
 // CIR:       cir.global external @pps = #cir.zero : !cir.record<struct
 // CIR-SAME:      "PackedAndPaddedS" packed padded {!s32i, !s8i, !u8i}>
-// LLVM:      @pps = dso_local global %struct.PackedAndPaddedS zeroinitializer
-// OGCG:      @pps = global %struct.PackedAndPaddedS zeroinitializer, align 2
+// LLVM-DAG:      @pps = dso_local global %struct.PackedAndPaddedS zeroinitializer
+// OGCG-DAG:      @pps = global %struct.PackedAndPaddedS zeroinitializer, align 2
 
 #pragma pack(pop)
 
@@ -114,3 +114,97 @@ void f2(void) {
 // OGCG-NEXT: entry:
 // OGCG-NEXT:   %[[S:.*]] = alloca %struct.CompleteS, align 4
 // OGCG-NEXT:   ret void
+
+char f3(int a) {
+  cs.a = a;
+  return cs.b;
+}
+
+// CIR:      cir.func @f3(%[[ARG_A:.*]]: !s32i
+// CIR-NEXT:   %[[A_ADDR:.*]] = cir.alloca {{.*}} ["a", init] {alignment = 4 : i64}
+// CIR-NEXT:   %[[RETVAL_ADDR:.*]] = cir.alloca {{.*}} ["__retval"] {alignment = 1 : i64}
+// CIR-NEXT:   cir.store %[[ARG_A]], %[[A_ADDR]]
+// CIR-NEXT:   %[[A_VAL:.*]] = cir.load %[[A_ADDR]]
+// CIR-NEXT:   %[[CS:.*]] = cir.get_global @cs
+// CIR-NEXT:   %[[CS_A:.*]] = cir.get_member %[[CS]][0] {name = "a"}
+// CIR-NEXT:   cir.store %[[A_VAL]], %[[CS_A]]
+// CIR-NEXT:   %[[CS2:.*]] = cir.get_global @cs
+// CIR-NEXT:   %[[CS_B:.*]] = cir.get_member %[[CS2]][1] {name = "b"}
+// CIR-NEXT:   %[[CS_B_VAL:.*]] = cir.load %[[CS_B]]
+// CIR-NEXT:   cir.store %[[CS_B_VAL]], %[[RETVAL_ADDR]]
+// CIR-NEXT:   %[[RETVAL:.*]] = cir.load %[[RETVAL_ADDR]]
+// CIR-NEXT:   cir.return %[[RETVAL]]
+
+// LLVM:      define i8 @f3(i32 %[[ARG_A:.*]])
+// LLVM-NEXT:   %[[A_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM-NEXT:   %[[RETVAL_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM-NEXT:   store i32 %[[ARG_A]], ptr %[[A_ADDR]], align 4
+// LLVM-NEXT:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// LLVM-NEXT:   store i32 %[[A_VAL]], ptr @cs, align 4
+// LLVM-NEXT:   %[[CS_B_VAL:.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @cs, i64 4), align 1
+// LLVM-NEXT:   store i8 %[[CS_B_VAL]], ptr %[[RETVAL_ADDR]], align 1
+// LLVM-NEXT:   %[[RETVAL:.*]] = load i8, ptr %[[RETVAL_ADDR]], align 1
+// LLVM-NEXT:   ret i8 %[[RETVAL]]
+
+// OGCG:      define{{.*}} i8 @f3(i32{{.*}} %[[ARG_A:.*]])
+// OGCG-NEXT: entry:
+// OGCG-NEXT:   %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG-NEXT:   store i32 %[[ARG_A]], ptr %[[A_ADDR]], align 4
+// OGCG-NEXT:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG-NEXT:   store i32 %[[A_VAL]], ptr @cs, align 4
+// OGCG-NEXT:   %[[CS_B_VAL:.*]] = load i8, ptr getelementptr inbounds nuw (%struct.CompleteS, ptr @cs, i32 0, i32 1), align 4
+// OGCG-NEXT:   ret i8 %[[CS_B_VAL]]
+
+char f4(int a, struct CompleteS *p) {
+  p->a = a;
+  return p->b;
+}
+
+// CIR:      cir.func @f4(%[[ARG_A:.*]]: !s32i {{.*}}, %[[ARG_P:.*]]: !cir.ptr<!cir.record<struct "CompleteS" {!s32i, !s8i}>>
+// CIR-NEXT:   %[[A_ADDR:.*]] = cir.alloca {{.*}} ["a", init] {alignment = 4 : i64}
+// CIR-NEXT:   %[[P_ADDR:.*]] = cir.alloca {{.*}} ["p", init] {alignment = 8 : i64}
+// CIR-NEXT:   %[[RETVAL_ADDR:.*]] = cir.alloca {{.*}} ["__retval"] {alignment = 1 : i64}
+// CIR-NEXT:   cir.store %[[ARG_A]], %[[A_ADDR]]
+// CIR-NEXT:   cir.store %[[ARG_P]], %[[P_ADDR]]
+// CIR-NEXT:   %[[A_VAL:.*]] = cir.load %[[A_ADDR]]
+// CIR-NEXT:   %[[P:.*]] = cir.load %[[P_ADDR]]
+// CIR-NEXT:   %[[P_A:.*]] = cir.get_member %[[P]][0] {name = "a"}
+// CIR-NEXT:   cir.store %[[A_VAL]], %[[P_A]]
+// CIR-NEXT:   %[[P2:.*]] = cir.load %[[P_ADDR]]
+// CIR-NEXT:   %[[P_B:.*]] = cir.get_member %[[P2]][1] {name = "b"}
+// CIR-NEXT:   %[[P_B_VAL:.*]] = cir.load %[[P_B]]
+// CIR-NEXT:   cir.store %[[P_B_VAL]], %[[RETVAL_ADDR]]
+// CIR-NEXT:   %[[RETVAL:.*]] = cir.load %[[RETVAL_ADDR]]
+// CIR-NEXT:   cir.return %[[RETVAL]]
+
+// LLVM:      define i8 @f4(i32 %[[ARG_A:.*]], ptr %[[ARG_P:.*]])
+// LLVM-NEXT:   %[[A_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM-NEXT:   %[[P_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM-NEXT:   %[[RETVAL_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM-NEXT:   store i32 %[[ARG_A]], ptr %[[A_ADDR]], align 4
+// LLVM-NEXT:   store ptr %[[ARG_P]], ptr %[[P_ADDR]], align 8
+// LLVM-NEXT:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// LLVM-NEXT:   %[[P_VAL:.*]] = load ptr, ptr %[[P_ADDR]], align 8
+// LLVM-NEXT:   %[[P_A:.*]] = getelementptr %struct.CompleteS, ptr %[[P_VAL]], i32 0, i32 0
+// LLVM-NEXT:   store i32 %[[A_VAL]], ptr %[[P_A]], align 4
+// LLVM-NEXT:   %[[P_VAL2:.*]] = load ptr, ptr %[[P_ADDR]], align 8
+// LLVM-NEXT:   %[[P_B:.*]] = getelementptr %struct.CompleteS, ptr %[[P_VAL2]], i32 0, i32 1
+// LLVM-NEXT:   %[[P_B_VAL:.*]] = load i8, ptr %[[P_B]], align 1
+// LLVM-NEXT:   store i8 %[[P_B_VAL]], ptr %[[RETVAL_ADDR]], align 1
+// LLVM-NEXT:   %[[RETVAL:.*]] = load i8, ptr %[[RETVAL_ADDR]], align 1
+// LLVM-NEXT:   ret i8 %[[RETVAL]]
+
+// OGCG:      define{{.*}} i8 @f4(i32{{.*}} %[[ARG_A:.*]], ptr{{.*}} %[[ARG_P:.*]])
+// OGCG-NEXT: entry:
+// OGCG-NEXT:   %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG-NEXT:   %[[P_ADDR:.*]] = alloca ptr, align 8
+// OGCG-NEXT:   store i32 %[[ARG_A]], ptr %[[A_ADDR]], align 4
+// OGCG-NEXT:   store ptr %[[ARG_P]], ptr %[[P_ADDR]], align 8
+// OGCG-NEXT:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG-NEXT:   %[[P:.*]] = load ptr, ptr %[[P_ADDR]], align 8
+// OGCG-NEXT:   %[[P_A:.*]] = getelementptr inbounds nuw %struct.CompleteS, ptr %[[P]], i32 0, i32 0
+// OGCG-NEXT:   store i32 %[[A_VAL]], ptr %[[P_A]], align 4
+// OGCG-NEXT:   %[[P2:.*]] = load ptr, ptr %[[P_ADDR]], align 8
+// OGCG-NEXT:   %[[P_B:.*]] = getelementptr inbounds nuw %struct.CompleteS, ptr %[[P2]], i32 0, i32 1
+// OGCG-NEXT:   %[[P_B_VAL:.*]] = load i8, ptr %[[P_B]], align 4
+// OGCG-NEXT:   ret i8 %[[P_B_VAL]]

>From 497182276d0048ad38d0244b4f42e62ca7ee49ee Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akaylor at nvidia.com>
Date: Mon, 21 Apr 2025 14:36:37 -0700
Subject: [PATCH 2/3] Address review feedback

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td | 2 +-
 clang/lib/CIR/Dialect/IR/CIRTypes.cpp        | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 80b875b2c94ce..e554b44ec21d1 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -1327,7 +1327,7 @@ def GetMemberOp : CIR_Op<"get_member"> {
     ```mlir
     // Suppose we have a record with multiple members.
     !s32i = !cir.int<s, 32>
-    !s8i = !cir.int<s, 32>
+    !s8i = !cir.int<s, 8>
     !ty_B = !cir.record<"struct.B" {!s32i, !s8i}>
 
     // Get the address of the member at index 1.
diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
index c7ac02b7984a3..75afc57f34aa5 100644
--- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
@@ -323,10 +323,8 @@ uint64_t RecordType::getElementOffset(const ::mlir::DataLayout &dataLayout,
 
   unsigned offset = 0;
 
-  for (unsigned i = 0, e = idx; i != e; ++i) {
-    mlir::Type ty = members[i];
-
-    // This matches LLVM since it uses the ABI instead of preferred alignment.
+  for (mlir::Type ty : llvm::make_range(members.begin(), std::next(members.begin(), idx))) {
+     // This matches LLVM since it uses the ABI instead of preferred alignment.
     const llvm::Align tyAlign =
         llvm::Align(getPacked() ? 1 : dataLayout.getTypeABIAlignment(ty));
 

>From 6885921f8ef3ed165da7d8b3230b1bdf294d3f70 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akaylor at nvidia.com>
Date: Mon, 21 Apr 2025 14:44:48 -0700
Subject: [PATCH 3/3] Fix formatting

---
 clang/lib/CIR/Dialect/IR/CIRTypes.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
index 75afc57f34aa5..c6133b9a20e4f 100644
--- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
@@ -323,8 +323,9 @@ uint64_t RecordType::getElementOffset(const ::mlir::DataLayout &dataLayout,
 
   unsigned offset = 0;
 
-  for (mlir::Type ty : llvm::make_range(members.begin(), std::next(members.begin(), idx))) {
-     // This matches LLVM since it uses the ABI instead of preferred alignment.
+  for (mlir::Type ty :
+       llvm::make_range(members.begin(), std::next(members.begin(), idx))) {
+    // This matches LLVM since it uses the ABI instead of preferred alignment.
     const llvm::Align tyAlign =
         llvm::Align(getPacked() ? 1 : dataLayout.getTypeABIAlignment(ty));