[flang-commits] [flang] 7ffeaf0 - [MLIR][Flang][OpenMP] Implement lowering simd aligned to MLIR (#95198)

Thu Jun 13 21:07:42 PDT 2024

Author: harishch4
Date: 2024-06-14T09:37:38+05:30
New Revision: 7ffeaf0e187b41994f63ae82e73e123b942cd16b

URL: https://github.com/llvm/llvm-project/commit/7ffeaf0e187b41994f63ae82e73e123b942cd16b
DIFF: https://github.com/llvm/llvm-project/commit/7ffeaf0e187b41994f63ae82e73e123b942cd16b.diff

LOG: [MLIR][Flang][OpenMP] Implement lowering simd aligned to MLIR (#95198)

Rebased @DominikAdamski patch: https://reviews.llvm.org/D142722

---------

Co-authored-by: Dominik Adamski <dominik.adamski at amd.com>
Co-authored-by: Tom Eccles <t at freedommail.info>

Added: 
    flang/test/Lower/OpenMP/simd_aarch64.f90
    flang/test/Lower/OpenMP/simd_x86_64.f90

Modified: 
    flang/lib/Lower/OpenMP/ClauseProcessor.cpp
    flang/lib/Lower/OpenMP/ClauseProcessor.h
    flang/lib/Lower/OpenMP/OpenMP.cpp
    flang/test/Lower/OpenMP/simd.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 371fe6db01255..27eea2b133b3c 100644

--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -16,6 +16,7 @@
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Parser/tools.h"
 #include "flang/Semantics/tools.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 
 namespace Fortran {
 namespace lower {
@@ -514,6 +515,65 @@ bool ClauseProcessor::processUntied(mlir::omp::UntiedClauseOps &result) const {
 //===----------------------------------------------------------------------===//
 // ClauseProcessor repeatable clauses
 //===----------------------------------------------------------------------===//
+static llvm::StringMap<bool> getTargetFeatures(mlir::ModuleOp module) {
+  llvm::StringMap<bool> featuresMap;
+  llvm::SmallVector<llvm::StringRef> targetFeaturesVec;
+  if (mlir::LLVM::TargetFeaturesAttr features =
+          fir::getTargetFeatures(module)) {
+    llvm::ArrayRef<mlir::StringAttr> featureAttrs = features.getFeatures();
+    for (auto &featureAttr : featureAttrs) {
+      llvm::StringRef featureKeyString = featureAttr.strref();
+      featuresMap[featureKeyString.substr(1)] = (featureKeyString[0] == '+');
+    }
+  }
+  return featuresMap;
+}
+
+static void
+addAlignedClause(lower::AbstractConverter &converter,
+                 const omp::clause::Aligned &clause,
+                 llvm::SmallVectorImpl<mlir::Value> &alignedVars,
+                 llvm::SmallVectorImpl<mlir::Attribute> &alignmentAttrs) {
+  using Aligned = omp::clause::Aligned;
+  lower::StatementContext stmtCtx;
+  mlir::IntegerAttr alignmentValueAttr;
+  int64_t alignment = 0;
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+
+  if (auto &alignmentValueParserExpr =
+          std::get<std::optional<Aligned::Alignment>>(clause.t)) {
+    mlir::Value operand = fir::getBase(
+        converter.genExprValue(*alignmentValueParserExpr, stmtCtx));
+    alignment = *fir::getIntIfConstant(operand);
+  } else {
+    llvm::StringMap<bool> featuresMap = getTargetFeatures(builder.getModule());
+    llvm::Triple triple = fir::getTargetTriple(builder.getModule());
+    alignment =
+        llvm::OpenMPIRBuilder::getOpenMPDefaultSimdAlign(triple, featuresMap);
+  }
+
+  // The default alignment for some targets is equal to 0.
+  // Do not generate alignment assumption if alignment is less than or equal to
+  // 0.
+  if (alignment > 0) {
+    auto &objects = std::get<omp::ObjectList>(clause.t);
+    if (!objects.empty())
+      genObjectList(objects, converter, alignedVars);
+    alignmentValueAttr = builder.getI64IntegerAttr(alignment);
+    // All the list items in a aligned clause will have same alignment
+    for (std::size_t i = 0; i < objects.size(); i++)
+      alignmentAttrs.push_back(alignmentValueAttr);
+  }
+}
+
+bool ClauseProcessor::processAligned(
+    mlir::omp::AlignedClauseOps &result) const {
+  return findRepeatableClause<omp::clause::Aligned>(
+      [&](const omp::clause::Aligned &clause, const parser::CharBlock &) {
+        addAlignedClause(converter, clause, result.alignedVars,
+                         result.alignmentAttrs);
+      });
+}
 
 bool ClauseProcessor::processAllocate(
     mlir::omp::AllocateClauseOps &result) const {

diff  --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index e8b06a703fc03..5c9ab8baf82dd 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -90,6 +90,7 @@ class ClauseProcessor {
   bool processUntied(mlir::omp::UntiedClauseOps &result) const;
 
   // 'Repeatable' clauses: They can appear multiple times in the clause list.
+  bool processAligned(mlir::omp::AlignedClauseOps &result) const;
   bool processAllocate(mlir::omp::AllocateClauseOps &result) const;
   bool processCopyin() const;
   bool processCopyprivate(mlir::Location currentLocation,
@@ -140,7 +141,6 @@ class ClauseProcessor {
   template <typename T>
   bool processMotionClauses(lower::StatementContext &stmtCtx,
                             mlir::omp::MapClauseOps &result);
-
   // Call this method for these clauses that should be supported but are not
   // implemented yet. It triggers a compilation error if any of the given
   // clauses is found.

diff  --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9a8211711123e..aac22f0faad37 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1070,15 +1070,15 @@ static void genSimdClauses(lower::AbstractConverter &converter,
                            const List<Clause> &clauses, mlir::Location loc,
                            mlir::omp::SimdClauseOps &clauseOps) {
   ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAligned(clauseOps);
   cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps);
   cp.processReduction(loc, clauseOps);
   cp.processSafelen(clauseOps);
   cp.processSimdlen(clauseOps);
-  // TODO Support delayed privatization.
 
-  cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear,
-                 clause::Nontemporal, clause::Order>(
-      loc, llvm::omp::Directive::OMPD_simd);
+  // TODO Support delayed privatization.
+  cp.processTODO<clause::Allocate, clause::Linear, clause::Nontemporal,
+                 clause::Order>(loc, llvm::omp::Directive::OMPD_simd);
 }
 
 static void genSingleClauses(lower::AbstractConverter &converter,

diff  --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90
index 223b248b79348..e98136dd57b0a 100644
--- a/flang/test/Lower/OpenMP/simd.f90
+++ b/flang/test/Lower/OpenMP/simd.f90
@@ -182,3 +182,44 @@ subroutine simd_with_collapse_clause(n)
   end do
   !$OMP END SIMD
 end subroutine
+
+
+!CHECK: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref
+!CHECK-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr
+!CHECK-SAME: {__address:i64}>> {fir.bindc_name = "a"}) {
+!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0
+!CHECK-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} :
+!CHECK-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.dscope) ->
+!CHECK-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>,
+!CHECK-SAME: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+subroutine simdloop_aligned_cptr( A)
+  use iso_c_binding
+  integer :: i
+  type (c_ptr) :: A
+!CHECK: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref
+!CHECK-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+!CHECK-SAME: -> 256 : i64)
+  !$OMP SIMD ALIGNED(A:256)
+  do i = 1, 10
+    call c_test_call(A)
+  end do
+  !$OMP END SIMD
+end subroutine
+
+!CHECK-LABEL: func @_QPsimdloop_aligned_allocatable
+subroutine simdloop_aligned_allocatable()
+  integer :: i
+  integer, allocatable :: A(:)
+  allocate(A(10))
+!CHECK: %[[A_PTR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "a",
+!CHECK-SAME: uniq_name = "_QFsimdloop_aligned_allocatableEa"}
+!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_PTR]] {fortran_attrs = #fir.var_attrs<allocatable>,
+!CHECK-SAME: uniq_name = "_QFsimdloop_aligned_allocatableEa"} :
+!CHECK-SAME: (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) ->
+!CHECK-SAME: (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -> 256 : i64)
+  !$OMP SIMD ALIGNED(A:256)
+  do i = 1, 10
+    A(i) = i
+  end do
+end subroutine

diff  --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90
new file mode 100644
index 0000000000000..735237223bcb5
--- /dev/null
+++ b/flang/test/Lower/OpenMP/simd_aarch64.f90
@@ -0,0 +1,16 @@
+! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64
+! The default alignment for AARCH64 is 0 so we do not emit aligned clause
+! REQUIRES: aarch64-registered-target
+! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck  %s
+subroutine simdloop_aligned_cptr(A)
+    use iso_c_binding
+    integer :: i
+    type (c_ptr) :: A
+  !CHECK: omp.simd
+  !CHECK-NOT: aligned(
+    !$OMP SIMD ALIGNED(A)
+    do i = 1, 10
+      call c_test_call(A)
+    end do
+    !$OMP END SIMD
+end subroutine

diff  --git a/flang/test/Lower/OpenMP/simd_x86_64.f90 b/flang/test/Lower/OpenMP/simd_x86_64.f90
new file mode 100644
index 0000000000000..c8cb7970c3222
--- /dev/null
+++ b/flang/test/Lower/OpenMP/simd_x86_64.f90
@@ -0,0 +1,48 @@
+! Tests for 2.9.3.1 Simd and target dependent defult alignment for x86
+! REQUIRES: x86-registered-target
+! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64  %s -o - | FileCheck --check-prefixes=DEFAULT %s
+! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64 -target-feature +avx %s -o - | FileCheck --check-prefixes=AVX %s
+! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64  -target-feature +avx512f  %s -o - | FileCheck --check-prefixes=AVX512F %s
+!DEFAULT: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref
+!DEFAULT-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr
+!DEFAULT-SAME: {__address:i64}>> {fir.bindc_name = "a"}) {
+!DEFAULT:  %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0
+!DEFAULT-SAME:  {uniq_name = "_QFsimdloop_aligned_cptrEa"} :
+!DEFAULT-SAME:  (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.dscope) ->
+!DEFAULT-SAME:  (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>,
+!DEFAULT-SAME:  !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+!AVX: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref
+!AVX-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr
+!AVX-SAME: {__address:i64}>> {fir.bindc_name = "a"}) {
+!AVX:  %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0
+!AVX-SAME:  {uniq_name = "_QFsimdloop_aligned_cptrEa"} :
+!AVX-SAME:  (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.dscope) ->
+!AVX-SAME:  (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>,
+!AVX-SAME:  !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+!AVX512F: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref
+!AVX512F-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr
+!AVX512F-SAME: {__address:i64}>> {fir.bindc_name = "a"}) {
+!AVX512F:  %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0
+!AVX512F-SAME:  {uniq_name = "_QFsimdloop_aligned_cptrEa"} :
+!AVX512F-SAME:  (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.dscope) ->
+!AVX512F-SAME:  (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>,
+!AVX512F-SAME:  !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+subroutine simdloop_aligned_cptr(A)
+    use iso_c_binding
+    integer :: i
+    type (c_ptr) :: A
+    !DEFAULT: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref
+    !DEFAULT-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+    !DEFAULT-SAME: -> 128 : i64)
+    !AVX: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref
+    !AVX-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+    !AVX-SAME: -> 256 : i64)
+    !AVX512F: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref
+    !AVX512F-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+    !AVX512F-SAME: -> 512 : i64)
+    !$OMP SIMD ALIGNED(A)
+    do i = 1, 10
+        call c_test_call(A)
+    end do
+    !$OMP END SIMD
+end subroutine