[flang-commits] [flang] b094c73 - [flang] Avoid opaque pointer issue with character array substring addressing

Jean Perier via flang-commits flang-commits at lists.llvm.org
Tue Jul 5 00:14:39 PDT 2022


Author: Jean Perier
Date: 2022-07-05T09:13:54+02:00
New Revision: b094c737cd85b8bf979c3cc998853b03a20d5c59

URL: https://github.com/llvm/llvm-project/commit/b094c737cd85b8bf979c3cc998853b03a20d5c59
DIFF: https://github.com/llvm/llvm-project/commit/b094c737cd85b8bf979c3cc998853b03a20d5c59.diff

LOG: [flang] Avoid opaque pointer issue with character array substring addressing

When addressing a substring of a character array, codegen emits two
GEPs: one for to compute the address of the base element, and a second
one to address the first characters from that element.

The first GEP still returns the LLVM array type (if the FIR array type could be
translated to an array type. Therefore) so zero
indexes must be added to the second GEP in this case to cover for the
Fortran array dimensions before inserting the susbtring offset index.

Surprisingly, the previous code worked ok when MLIR emits none opaque
pointers. But with opaque pointers, the two GEPs are folded in an
invalid GEP where the substring offset becomes an offset for the outer
array dimension.

Note that I tried to fix the issue by modifying the first GEP to return the
element type, but this still gave bad results (here something might be
wrong with opaque pointer in MLIR or LLVM).

Differential Revision: https://reviews.llvm.org/D129079

Added: 
    

Modified: 
    flang/lib/Optimizer/CodeGen/CodeGen.cpp
    flang/test/Fir/embox.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 2b9646aea0865..e53d096806612 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -1064,6 +1064,15 @@ getFree(fir::FreeMemOp op, mlir::ConversionPatternRewriter &rewriter) {
                                         /*isVarArg=*/false));
 }
 
+static unsigned getDimension(mlir::LLVM::LLVMArrayType ty) {
+  unsigned result = 1;
+  for (auto eleTy = ty.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>();
+       eleTy;
+       eleTy = eleTy.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>())
+    ++result;
+  return result;
+}
+
 namespace {
 /// Lower a `fir.freemem` instruction into `llvm.call @free`
 struct FreeMemOpConversion : public FIROpConversion<fir::FreeMemOp> {
@@ -1383,9 +1392,15 @@ struct EmboxCommonConversion : public FIROpConversion<OP> {
     llvm::SmallVector<mlir::Value> gepOperands;
     auto baseType =
         base.getType().cast<mlir::LLVM::LLVMPointerType>().getElementType();
-    if (baseType.isa<mlir::LLVM::LLVMArrayType>()) {
+    if (auto arrayType = baseType.dyn_cast<mlir::LLVM::LLVMArrayType>()) {
+      // FIXME: The baseType should be the array element type here, meaning
+      // there should at most be one dimension (constant length characters are
+      // lowered to LLVM as an array of length one characters.). However, using
+      // the character type in the GEP does not lead to correct GEPs when llvm
+      // opaque pointers are enabled.
       auto idxTy = this->lowerTy().indexType();
-      gepOperands.push_back(genConstantIndex(loc, idxTy, rewriter, 0));
+      gepOperands.append(getDimension(arrayType),
+                         genConstantIndex(loc, idxTy, rewriter, 0));
       gepOperands.push_back(lowerBound);
     } else {
       gepOperands.push_back(lowerBound);
@@ -1927,15 +1942,6 @@ struct ValueOpCommon {
   }
 
 private:
-  static unsigned getDimension(mlir::LLVM::LLVMArrayType ty) {
-    unsigned result = 1;
-    for (auto eleTy = ty.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>();
-         eleTy;
-         eleTy = eleTy.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>())
-      ++result;
-    return result;
-  }
-
   static mlir::Type getArrayElementType(mlir::LLVM::LLVMArrayType ty) {
     auto eleTy = ty.getElementType();
     while (auto arrTy = eleTy.dyn_cast<mlir::LLVM::LLVMArrayType>())

diff  --git a/flang/test/Fir/embox.fir b/flang/test/Fir/embox.fir
index 12b956dff6238..0019217bb68d9 100644
--- a/flang/test/Fir/embox.fir
+++ b/flang/test/Fir/embox.fir
@@ -75,7 +75,7 @@ func.func @emboxSubstring(%arg0: !fir.ref<!fir.array<2x3x!fir.char<1,4>>>) {
   %1 = fir.slice %c1, %c2, %c1, %c1, %c3, %c1 substr %c1_i64, %c2_i64 : (index, index, index, index, index, index, i64, i64) -> !fir.slice<2>
   %2 = fir.embox %arg0(%0) [%1] : (!fir.ref<!fir.array<2x3x!fir.char<1,4>>>, !fir.shape<2>, !fir.slice<2>) -> !fir.box<!fir.array<?x?x!fir.char<1,?>>>
   // CHECK: %[[addr:.*]] = getelementptr [3 x [2 x [4 x i8]]], ptr %[[arg0]], i64 0, i64 0, i64 0
-  // CHECK: %[[substringAddr:.*]] = getelementptr {{.*}}, ptr %[[addr]], i64 0, i64 1
+  // CHECK: %[[substringAddr:.*]] = getelementptr {{.*}}, ptr %[[addr]], i64 0, i64 0, i64 0, i64 1
   // CHECK: insertvalue {[[descriptorType:.*]]} { ptr undef, i64 2, i32 20180515, i8 2, i8 40, i8 0, i8 0,
   // CHECK-SAME: [2 x [3 x i64]] [{{\[}}3 x i64] [i64 1, i64 2, i64 4], [3 x i64] [i64 1, i64 3, i64 8]] },
   // CHECK-SAME: ptr %[[substringAddr]], 0


        


More information about the flang-commits mailing list