[flang-commits] [flang] [Flang] - Handle `BoxCharType` in `fir.box_offset` op (PR #141713)

Pranav Bhandarkar via flang-commits flang-commits at lists.llvm.org
Tue Jun 3 11:17:49 PDT 2025


https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/141713

>From 271272f7a98bf5bf5e651c70cbd5030a311cc078 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 23 May 2025 10:23:57 -0500
Subject: [PATCH 1/4] Add the ability to the fir.box_offset op to handle
 references to fir.boxchar

---
 .../include/flang/Optimizer/Dialect/FIROps.td |  6 +++++
 .../include/flang/Optimizer/Dialect/FIRType.h |  5 ++--
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 25 ++++++++++++++-----
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 16 +++++++++---
 flang/lib/Optimizer/Dialect/FIRType.cpp       |  2 +-
 flang/test/Fir/box-offset-codegen.fir         | 10 ++++++++
 flang/test/Fir/box-offset.fir                 |  5 ++++
 flang/test/Fir/invalid.fir                    | 10 +++++++-
 8 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index dc66885f776f0..160de05a33b41 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -3240,11 +3240,17 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> {
     descriptor implementation must have, only the base_addr and derived_type
     descriptor fields can be addressed.
 
+    It also accepts the address of a fir.boxchar and returns
+    address of the data pointer encapsulated by the fir.boxchar.
+
     ```
         %addr = fir.box_offset %box base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
         %tdesc = fir.box_offset %box derived_type : (!fir.ref<!fir.box<!fir.type<t>>>) -> !fir.llvm_ptr<!fir.tdesc<!fir.type<t>>>
 
+        %addr1 = fir.box_offset %boxchar base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<fir.char<1,?>>>
     ```
+
+    The derived_type field cannot be used when the input to this op is a reference to a fir.boxchar.
   }];
 
   let arguments = (ins
diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h
index 52b14f15f89bd..01878aa41005c 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRType.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRType.h
@@ -278,8 +278,9 @@ inline mlir::Type unwrapRefType(mlir::Type t) {
 /// If `t` conforms with a pass-by-reference type (box, ref, ptr, etc.) then
 /// return the element type of `t`. Otherwise, return `t`.
 inline mlir::Type unwrapPassByRefType(mlir::Type t) {
-  if (auto eleTy = dyn_cast_ptrOrBoxEleTy(t))
-    return eleTy;
+  if (conformsWithPassByRef(t))
+    if (auto eleTy = dyn_cast_ptrOrBoxEleTy(t))
+      return eleTy;
   return t;
 }
 
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 205807eab403a..e383c2e3e89ab 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3930,12 +3930,25 @@ struct BoxOffsetOpConversion : public fir::FIROpConversion<fir::BoxOffsetOp> {
                   mlir::ConversionPatternRewriter &rewriter) const override {
 
     mlir::Type pty = ::getLlvmPtrType(boxOffset.getContext());
-    mlir::Type boxType = fir::unwrapRefType(boxOffset.getBoxRef().getType());
-    mlir::Type llvmBoxTy =
-        lowerTy().convertBoxTypeAsStruct(mlir::cast<fir::BaseBoxType>(boxType));
-    int fieldId = boxOffset.getField() == fir::BoxFieldAttr::derived_type
-                      ? getTypeDescFieldId(boxType)
-                      : kAddrPosInBox;
+    mlir::Type boxRefType = fir::unwrapRefType(boxOffset.getBoxRef().getType());
+
+    assert((mlir::isa<fir::BaseBoxType>(boxRefType) ||
+            mlir::isa<fir::BoxCharType>(boxRefType)) &&
+           "boxRef should be a reference to either fir.box or fir.boxchar");
+
+    mlir::Type llvmBoxTy;
+    int fieldId;
+    if (auto boxType = mlir::dyn_cast_or_null<fir::BaseBoxType>(boxRefType)) {
+      llvmBoxTy =
+          lowerTy().convertBoxTypeAsStruct(mlir::cast<fir::BaseBoxType>(boxType));
+      fieldId = boxOffset.getField() == fir::BoxFieldAttr::derived_type
+                        ? getTypeDescFieldId(boxType)
+                        : kAddrPosInBox;
+    } else {
+      auto boxCharType = mlir::cast<fir::BoxCharType>(boxRefType);
+      llvmBoxTy = lowerTy().convertType(boxCharType);
+      fieldId = kAddrPosInBox;
+    }
     rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(
         boxOffset, pty, llvmBoxTy, adaptor.getBoxRef(),
         llvm::ArrayRef<mlir::LLVM::GEPArg>{0, fieldId});
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index cbe93907265f6..6435886d73081 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -4484,15 +4484,25 @@ void fir::IfOp::resultToSourceOps(llvm::SmallVectorImpl<mlir::Value> &results,
 llvm::LogicalResult fir::BoxOffsetOp::verify() {
   auto boxType = mlir::dyn_cast_or_null<fir::BaseBoxType>(
       fir::dyn_cast_ptrEleTy(getBoxRef().getType()));
-  if (!boxType)
-    return emitOpError("box_ref operand must have !fir.ref<!fir.box<T>> type");
+  mlir::Type boxCharType;
+  bool isBoxChar = false;
+  if (!boxType) {
+    boxCharType = mlir::dyn_cast_or_null<fir::BoxCharType>(
+        fir::dyn_cast_ptrEleTy(getBoxRef().getType()));
+    if (!boxCharType)
+      return emitOpError("box_ref operand must have !fir.ref<!fir.box<T>> or !fir.ref<!fir.boxchar<k>> type");
+    isBoxChar = true;
+  }
   if (getField() != fir::BoxFieldAttr::base_addr &&
       getField() != fir::BoxFieldAttr::derived_type)
     return emitOpError("cannot address provided field");
-  if (getField() == fir::BoxFieldAttr::derived_type)
+  if (getField() == fir::BoxFieldAttr::derived_type) {
+    if (isBoxChar)
+      return emitOpError("cannot address derived_type field of a fir.boxchar");
     if (!fir::boxHasAddendum(boxType))
       return emitOpError("can only address derived_type field of derived type "
                          "or unlimited polymorphic fir.box");
+  }
   return mlir::success();
 }
 
diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp
index 1e6e95393c2f7..da7aa17445404 100644
--- a/flang/lib/Optimizer/Dialect/FIRType.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRType.cpp
@@ -255,7 +255,7 @@ mlir::Type dyn_cast_ptrOrBoxEleTy(mlir::Type t) {
   return llvm::TypeSwitch<mlir::Type, mlir::Type>(t)
       .Case<fir::ReferenceType, fir::PointerType, fir::HeapType,
             fir::LLVMPointerType>([](auto p) { return p.getEleTy(); })
-      .Case<fir::BaseBoxType>(
+      .Case<fir::BaseBoxType, fir::BoxCharType>(
           [](auto p) { return unwrapRefType(p.getEleTy()); })
       .Default([](mlir::Type) { return mlir::Type{}; });
 }
diff --git a/flang/test/Fir/box-offset-codegen.fir b/flang/test/Fir/box-offset-codegen.fir
index 15c9a11e5aefe..59cfda8523061 100644
--- a/flang/test/Fir/box-offset-codegen.fir
+++ b/flang/test/Fir/box-offset-codegen.fir
@@ -37,3 +37,13 @@ func.func @array_tdesc(%array : !fir.ref<!fir.class<!fir.ptr<!fir.array<?x!fir.t
 // CHECK-SAME: ptr captures(none) %[[BOX:.*]]){{.*}}{
 // CHECK:    %[[VAL_0:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[BOX]], i32 0, i32 8
 // CHECK:    ret ptr %[[VAL_0]]
+
+func.func @boxchar_addr(%boxchar : !fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>> {
+  %addr = fir.box_offset %boxchar base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
+  return %addr : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
+}
+
+// CHECK-LABEL: define ptr @boxchar_addr(
+// CHECK-SAME: ptr captures(none) %[[BOXCHAR:.*]]){{.*}} {
+// CHECK: %[[VAL_0:.*]] = getelementptr { ptr, i64 }, ptr %[[BOXCHAR]], i32 0, i32 0
+// CHECK: ret ptr %[[VAL_0]]
diff --git a/flang/test/Fir/box-offset.fir b/flang/test/Fir/box-offset.fir
index 98c2eaefb8d6b..181ad51a5dbe1 100644
--- a/flang/test/Fir/box-offset.fir
+++ b/flang/test/Fir/box-offset.fir
@@ -21,6 +21,9 @@ func.func @test_box_offset(%unlimited : !fir.ref<!fir.class<none>>, %type_star :
 
   %addr6 = fir.box_offset %type_star base_addr : (!fir.ref<!fir.box<!fir.array<?xnone>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xnone>>>
   %tdesc6 = fir.box_offset %type_star derived_type : (!fir.ref<!fir.box<!fir.array<?xnone>>>) -> !fir.llvm_ptr<!fir.tdesc<none>>
+
+  %boxchar = fir.alloca !fir.boxchar<1>
+  %addr7 = fir.box_offset %boxchar base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
   return
 }
 // CHECK-LABEL:   func.func @test_box_offset(
@@ -40,3 +43,5 @@ func.func @test_box_offset(%unlimited : !fir.ref<!fir.class<none>>, %type_star :
 // CHECK:           %[[VAL_13:.*]] = fir.box_offset %[[VAL_0]] derived_type : (!fir.ref<!fir.class<none>>) -> !fir.llvm_ptr<!fir.tdesc<none>>
 // CHECK:           %[[VAL_14:.*]] = fir.box_offset %[[VAL_1]] base_addr : (!fir.ref<!fir.box<!fir.array<?xnone>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xnone>>>
 // CHECK:           %[[VAL_15:.*]] = fir.box_offset %[[VAL_1]] derived_type : (!fir.ref<!fir.box<!fir.array<?xnone>>>) -> !fir.llvm_ptr<!fir.tdesc<none>>
+// CHECK:           %[[VAL_16:.*]] = fir.alloca !fir.boxchar<1>
+// CHECK:           %[[VAL_17:.*]] = fir.box_offset %[[VAL_16]] base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir
index fd607fd9066f7..45cae1f82cb8e 100644
--- a/flang/test/Fir/invalid.fir
+++ b/flang/test/Fir/invalid.fir
@@ -972,13 +972,21 @@ func.func @rec_to_rec(%arg0: !fir.type<t1{i:i32, f:f32}>) -> !fir.type<t2{f:f32,
 // -----
 
 func.func @bad_box_offset(%not_a_box : !fir.ref<i32>) {
-  // expected-error at +1{{'fir.box_offset' op box_ref operand must have !fir.ref<!fir.box<T>> type}}
+  // expected-error at +1{{'fir.box_offset' op box_ref operand must have !fir.ref<!fir.box<T>> or !fir.ref<!fir.boxchar<k>> type}}
   %addr1 = fir.box_offset %not_a_box base_addr : (!fir.ref<i32>) -> !fir.llvm_ptr<!fir.ref<i32>>
   return
 }
 
 // -----
 
+func.func @bad_box_offset(%boxchar : !fir.ref<!fir.boxchar<1>>) {
+  // expected-error at +1{{'fir.box_offset' op cannot address derived_type field of a fir.boxchar}}
+  %addr1 = fir.box_offset %boxchar derived_type : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
+  return
+}
+
+// -----
+
 func.func @bad_box_offset(%no_addendum : !fir.ref<!fir.box<i32>>) {
   // expected-error at +1{{'fir.box_offset' op can only address derived_type field of derived type or unlimited polymorphic fir.box}}
   %addr1 = fir.box_offset %no_addendum derived_type : (!fir.ref<!fir.box<i32>>) -> !fir.llvm_ptr<!fir.tdesc<!fir.type<none>>>

>From ada55c9750f6c2c2309ed7356c64e4751290cc3e Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 28 May 2025 10:21:12 -0500
Subject: [PATCH 2/4] Fix clang formatt issues

---
 flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 ++++----
 flang/lib/Optimizer/Dialect/FIROps.cpp  | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index e383c2e3e89ab..82d960a6fc61e 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3939,11 +3939,11 @@ struct BoxOffsetOpConversion : public fir::FIROpConversion<fir::BoxOffsetOp> {
     mlir::Type llvmBoxTy;
     int fieldId;
     if (auto boxType = mlir::dyn_cast_or_null<fir::BaseBoxType>(boxRefType)) {
-      llvmBoxTy =
-          lowerTy().convertBoxTypeAsStruct(mlir::cast<fir::BaseBoxType>(boxType));
+      llvmBoxTy = lowerTy().convertBoxTypeAsStruct(
+          mlir::cast<fir::BaseBoxType>(boxType));
       fieldId = boxOffset.getField() == fir::BoxFieldAttr::derived_type
-                        ? getTypeDescFieldId(boxType)
-                        : kAddrPosInBox;
+                    ? getTypeDescFieldId(boxType)
+                    : kAddrPosInBox;
     } else {
       auto boxCharType = mlir::cast<fir::BoxCharType>(boxRefType);
       llvmBoxTy = lowerTy().convertType(boxCharType);
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 6435886d73081..8d3c82d00eec5 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -4490,7 +4490,8 @@ llvm::LogicalResult fir::BoxOffsetOp::verify() {
     boxCharType = mlir::dyn_cast_or_null<fir::BoxCharType>(
         fir::dyn_cast_ptrEleTy(getBoxRef().getType()));
     if (!boxCharType)
-      return emitOpError("box_ref operand must have !fir.ref<!fir.box<T>> or !fir.ref<!fir.boxchar<k>> type");
+      return emitOpError("box_ref operand must have !fir.ref<!fir.box<T>> or "
+                         "!fir.ref<!fir.boxchar<k>> type");
     isBoxChar = true;
   }
   if (getField() != fir::BoxFieldAttr::base_addr &&

>From f3e8cbe2cf0eab5410d9cd78a9a4849b81162669 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 3 Jun 2025 10:54:55 -0500
Subject: [PATCH 3/4] Make changes based on review comments

---
 flang/include/flang/Optimizer/Dialect/FIRType.h | 5 ++---
 flang/lib/Optimizer/Dialect/FIROps.cpp          | 6 ++----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h
index 01878aa41005c..52b14f15f89bd 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRType.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRType.h
@@ -278,9 +278,8 @@ inline mlir::Type unwrapRefType(mlir::Type t) {
 /// If `t` conforms with a pass-by-reference type (box, ref, ptr, etc.) then
 /// return the element type of `t`. Otherwise, return `t`.
 inline mlir::Type unwrapPassByRefType(mlir::Type t) {
-  if (conformsWithPassByRef(t))
-    if (auto eleTy = dyn_cast_ptrOrBoxEleTy(t))
-      return eleTy;
+  if (auto eleTy = dyn_cast_ptrOrBoxEleTy(t))
+    return eleTy;
   return t;
 }
 
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 8d3c82d00eec5..6181e1fad4240 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -4485,21 +4485,19 @@ llvm::LogicalResult fir::BoxOffsetOp::verify() {
   auto boxType = mlir::dyn_cast_or_null<fir::BaseBoxType>(
       fir::dyn_cast_ptrEleTy(getBoxRef().getType()));
   mlir::Type boxCharType;
-  bool isBoxChar = false;
   if (!boxType) {
     boxCharType = mlir::dyn_cast_or_null<fir::BoxCharType>(
         fir::dyn_cast_ptrEleTy(getBoxRef().getType()));
     if (!boxCharType)
       return emitOpError("box_ref operand must have !fir.ref<!fir.box<T>> or "
                          "!fir.ref<!fir.boxchar<k>> type");
-    isBoxChar = true;
+    if (getField() == fir::BoxFieldAttr::derived_type)
+      return emitOpError("cannot address derived_type field of a fir.boxchar");
   }
   if (getField() != fir::BoxFieldAttr::base_addr &&
       getField() != fir::BoxFieldAttr::derived_type)
     return emitOpError("cannot address provided field");
   if (getField() == fir::BoxFieldAttr::derived_type) {
-    if (isBoxChar)
-      return emitOpError("cannot address derived_type field of a fir.boxchar");
     if (!fir::boxHasAddendum(boxType))
       return emitOpError("can only address derived_type field of derived type "
                          "or unlimited polymorphic fir.box");

>From 33005313317b7925a5664a1e9aee6595cd060c0d Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 3 Jun 2025 13:17:23 -0500
Subject: [PATCH 4/4] Fix testcase

---
 flang/test/Fir/box-offset-codegen.fir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/test/Fir/box-offset-codegen.fir b/flang/test/Fir/box-offset-codegen.fir
index 59cfda8523061..2b84d3d1f309d 100644
--- a/flang/test/Fir/box-offset-codegen.fir
+++ b/flang/test/Fir/box-offset-codegen.fir
@@ -44,6 +44,6 @@ func.func @boxchar_addr(%boxchar : !fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!
 }
 
 // CHECK-LABEL: define ptr @boxchar_addr(
-// CHECK-SAME: ptr captures(none) %[[BOXCHAR:.*]]){{.*}} {
+// CHECK-SAME: ptr {{.*}} %[[BOXCHAR:.*]]){{.*}} {
 // CHECK: %[[VAL_0:.*]] = getelementptr { ptr, i64 }, ptr %[[BOXCHAR]], i32 0, i32 0
 // CHECK: ret ptr %[[VAL_0]]



More information about the flang-commits mailing list