[flang-commits] [flang] [Flang] Hoisting constant-sized allocas at flang codegen. (PR #95310)

Wed Jun 12 14:12:57 PDT 2024

https://github.com/VijayKandiah updated https://github.com/llvm/llvm-project/pull/95310

>From b9d36f33b66d301b004a5337ff7e57fdde9cbb82 Mon Sep 17 00:00:00 2001
From: Vijay Kandiah <vkandiah at sky6.pgi.net>
Date: Wed, 12 Jun 2024 13:19:39 -0700
Subject: [PATCH 1/2] [Flang] Hoisting constant-sized allocas at flang codegen.

---
 .../flang/Optimizer/CodeGen/FIROpPatterns.h   | 11 +++--
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 18 ++++++--
 flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp | 43 ++++++++++---------
 3 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
index 211acdc8a38e6..6ace73e2d16af 100644
--- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
+++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
@@ -51,7 +51,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
   /// appropriate reified structures.
   mlir::Value integerCast(mlir::Location loc,
                           mlir::ConversionPatternRewriter &rewriter,
-                          mlir::Type ty, mlir::Value val) const;
+                          mlir::Type ty, mlir::Value val,
+                          bool fold = false) const;
+  
   struct TypePair {
     mlir::Type fir;
     mlir::Type llvm;
@@ -144,9 +146,10 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
   // Find the Block in which the alloca should be inserted.
   // The order to recursively find the proper block:
   // 1. An OpenMP Op that will be outlined.
-  // 2. A LLVMFuncOp
-  // 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp
-  mlir::Block *getBlockForAllocaInsert(mlir::Operation *op) const;
+  // 2. An OpenMP or OpenACC Op with one or more regions holding executable code.
+  // 3. A LLVMFuncOp
+  // 4. The first ancestor that is one of the above.
+  mlir::Block *getBlockForAllocaInsert(mlir::Operation *op, mlir::Region *parentRegion) const;
 
   // Generate an alloca of size 1 for an object of type \p llvmObjectTy in the
   // allocation address space provided for the architecture in the DataLayout
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 9f21c6b0cf097..d078a000ccd65 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -218,7 +218,7 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
             chrTy.getContext(), chrTy.getFKind());
         llvmObjectType = convertType(rawCharTy);
         assert(end == 1);
-        size = integerCast(loc, rewriter, ity, lenParams[0]);
+        size = integerCast(loc, rewriter, ity, lenParams[0], /*fold=*/true);
       } else if (auto recTy = mlir::dyn_cast<fir::RecordType>(scalarType)) {
         mlir::LLVM::LLVMFuncOp memSizeFn =
             getDependentTypeMemSizeFn(recTy, alloc, rewriter);
@@ -236,17 +236,27 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
       }
     }
     if (auto scaleSize = genAllocationScaleSize(alloc, ity, rewriter))
-      size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
+      size = rewriter.createOrFold<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     if (alloc.hasShapeOperands()) {
       unsigned end = operands.size();
       for (; i < end; ++i)
-        size = rewriter.create<mlir::LLVM::MulOp>(
-            loc, ity, size, integerCast(loc, rewriter, ity, operands[i]));
+        size = rewriter.createOrFold<mlir::LLVM::MulOp>(
+            loc, ity, size,
+            integerCast(loc, rewriter, ity, operands[i], /*fold=*/true));
     }
 
     unsigned allocaAs = getAllocaAddressSpace(rewriter);
     unsigned programAs = getProgramAddressSpace(rewriter);
 
+    if (mlir::isa<mlir::LLVM::ConstantOp>(size.getDefiningOp())) {
+      // Set the Block in which the llvm alloca should be inserted.
+      mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
+      mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
+      mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp, parentRegion);
+      size.getDefiningOp()->moveAfter(insertBlock, insertBlock->begin());
+      rewriter.setInsertionPointAfter(size.getDefiningOp());
+    }
+
     // NOTE: we used to pass alloc->getAttrs() in the builder for non opaque
     // pointers! Only propagate pinned and bindc_name to help debugging, but
     // this should have no functional purpose (and passing the operand segment
diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
index 72e072db37432..6d86879cd3219 100644
--- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
+++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
@@ -65,7 +65,7 @@ mlir::LLVM::ConstantOp ConvertFIRToLLVMPattern::genConstantOffset(
 mlir::Value
 ConvertFIRToLLVMPattern::integerCast(mlir::Location loc,
                                      mlir::ConversionPatternRewriter &rewriter,
-                                     mlir::Type ty, mlir::Value val) const {
+                                     mlir::Type ty, mlir::Value val, bool fold) const {
   auto valTy = val.getType();
   // If the value was not yet lowered, lower its type so that it can
   // be used in getPrimitiveTypeSizeInBits.
@@ -73,10 +73,17 @@ ConvertFIRToLLVMPattern::integerCast(mlir::Location loc,
     valTy = convertType(valTy);
   auto toSize = mlir::LLVM::getPrimitiveTypeSizeInBits(ty);
   auto fromSize = mlir::LLVM::getPrimitiveTypeSizeInBits(valTy);
-  if (toSize < fromSize)
-    return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
-  if (toSize > fromSize)
-    return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
+  if (fold) {
+    if (toSize < fromSize)
+      return rewriter.createOrFold<mlir::LLVM::TruncOp>(loc, ty, val);
+    if (toSize > fromSize)
+      return rewriter.createOrFold<mlir::LLVM::SExtOp>(loc, ty, val);
+  } else {
+    if (toSize < fromSize)
+      return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
+    if (toSize > fromSize)
+      return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
+  }
   return val;
 }
 
@@ -274,16 +281,19 @@ mlir::Value ConvertFIRToLLVMPattern::computeBoxSize(
 // Find the Block in which the alloca should be inserted.
 // The order to recursively find the proper block:
 // 1. An OpenMP Op that will be outlined.
-// 2. A LLVMFuncOp
-// 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp
-mlir::Block *
-ConvertFIRToLLVMPattern::getBlockForAllocaInsert(mlir::Operation *op) const {
+// 2. An OpenMP or OpenACC Op with one or more regions holding executable code.
+// 3. A LLVMFuncOp
+// 4. The first ancestor that is one of the above.
+mlir::Block *ConvertFIRToLLVMPattern::getBlockForAllocaInsert(
+    mlir::Operation *op, mlir::Region *parentRegion) const {
   if (auto iface = mlir::dyn_cast<mlir::omp::OutlineableOpenMPOpInterface>(op))
     return iface.getAllocaBlock();
+  if (auto recipeIface = mlir::dyn_cast<mlir::accomp::RecipeInterface>(op))
+    return recipeIface.getAllocaBlock(*parentRegion);
   if (auto llvmFuncOp = mlir::dyn_cast<mlir::LLVM::LLVMFuncOp>(op))
     return &llvmFuncOp.front();
 
-  return getBlockForAllocaInsert(op->getParentOp());
+  return getBlockForAllocaInsert(op->getParentOp(), parentRegion);
 }
 
 // Generate an alloca of size 1 for an object of type \p llvmObjectTy in the
@@ -297,16 +307,9 @@ mlir::Value ConvertFIRToLLVMPattern::genAllocaAndAddrCastWithType(
     mlir::ConversionPatternRewriter &rewriter) const {
   auto thisPt = rewriter.saveInsertionPoint();
   mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
-  if (mlir::isa<mlir::omp::DeclareReductionOp>(parentOp) ||
-      mlir::isa<mlir::omp::PrivateClauseOp>(parentOp)) {
-    // DeclareReductionOp & PrivateClauseOp have multiple child regions. We want
-    // to get the first block of whichever of those regions we are currently in
-    mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
-    rewriter.setInsertionPointToStart(&parentRegion->front());
-  } else {
-    mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
-    rewriter.setInsertionPointToStart(insertBlock);
-  }
+  mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
+  mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp, parentRegion);
+  rewriter.setInsertionPointToStart(insertBlock);
   auto size = genI32Constant(loc, rewriter, 1);
   unsigned allocaAs = getAllocaAddressSpace(rewriter);
   unsigned programAs = getProgramAddressSpace(rewriter);

>From 52e0a94fe2c51915dfbc2f67463aa831427ccce0 Mon Sep 17 00:00:00 2001
From: Vijay Kandiah <vkandiah at sky6.pgi.net>
Date: Wed, 12 Jun 2024 14:12:40 -0700
Subject: [PATCH 2/2] [flang] code-formatting fixes for hoisting alloca pass.

---
 flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h | 8 +++++---
 flang/lib/Optimizer/CodeGen/CodeGen.cpp               | 6 ++++--
 flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp         | 7 +++----
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
index 6ace73e2d16af..ac095664f6188 100644
--- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
+++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
@@ -53,7 +53,7 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
                           mlir::ConversionPatternRewriter &rewriter,
                           mlir::Type ty, mlir::Value val,
                           bool fold = false) const;
-  
+
   struct TypePair {
     mlir::Type fir;
     mlir::Type llvm;
@@ -146,10 +146,12 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
   // Find the Block in which the alloca should be inserted.
   // The order to recursively find the proper block:
   // 1. An OpenMP Op that will be outlined.
-  // 2. An OpenMP or OpenACC Op with one or more regions holding executable code.
+  // 2. An OpenMP or OpenACC Op with one or more regions holding executable
+  // code.
   // 3. A LLVMFuncOp
   // 4. The first ancestor that is one of the above.
-  mlir::Block *getBlockForAllocaInsert(mlir::Operation *op, mlir::Region *parentRegion) const;
+  mlir::Block *getBlockForAllocaInsert(mlir::Operation *op,
+                                       mlir::Region *parentRegion) const;
 
   // Generate an alloca of size 1 for an object of type \p llvmObjectTy in the
   // allocation address space provided for the architecture in the DataLayout
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index d078a000ccd65..4448224024f20 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -236,7 +236,8 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
       }
     }
     if (auto scaleSize = genAllocationScaleSize(alloc, ity, rewriter))
-      size = rewriter.createOrFold<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
+      size =
+          rewriter.createOrFold<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     if (alloc.hasShapeOperands()) {
       unsigned end = operands.size();
       for (; i < end; ++i)
@@ -252,7 +253,8 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
       // Set the Block in which the llvm alloca should be inserted.
       mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
       mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
-      mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp, parentRegion);
+      mlir::Block *insertBlock =
+          getBlockForAllocaInsert(parentOp, parentRegion);
       size.getDefiningOp()->moveAfter(insertBlock, insertBlock->begin());
       rewriter.setInsertionPointAfter(size.getDefiningOp());
     }
diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
index 6d86879cd3219..b9a28b89d9a55 100644
--- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
+++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
@@ -62,10 +62,9 @@ mlir::LLVM::ConstantOp ConvertFIRToLLVMPattern::genConstantOffset(
 /// to the specific target may involve some sign-extending or truncation of
 /// values, particularly to fit them from abstract box types to the
 /// appropriate reified structures.
-mlir::Value
-ConvertFIRToLLVMPattern::integerCast(mlir::Location loc,
-                                     mlir::ConversionPatternRewriter &rewriter,
-                                     mlir::Type ty, mlir::Value val, bool fold) const {
+mlir::Value ConvertFIRToLLVMPattern::integerCast(
+    mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+    mlir::Type ty, mlir::Value val, bool fold) const {
   auto valTy = val.getType();
   // If the value was not yet lowered, lower its type so that it can
   // be used in getPrimitiveTypeSizeInBits.