[flang-commits] [flang] f5efa18 - [Flang][MLIR][OpenMP] Add support for simdlen clause

Thu Jul 28 08:49:50 PDT 2022

Author: Prabhdeep Singh Soni
Date: 2022-07-28T23:49:17+08:00
New Revision: f5efa1892e35d15695b8ae9474af3ce8e1d5b8e2

URL: https://github.com/llvm/llvm-project/commit/f5efa1892e35d15695b8ae9474af3ce8e1d5b8e2
DIFF: https://github.com/llvm/llvm-project/commit/f5efa1892e35d15695b8ae9474af3ce8e1d5b8e2.diff

LOG: [Flang][MLIR][OpenMP] Add support for simdlen clause

This supports lowering from parse-tree to MLIR and translation from
MLIR to LLVM IR using OMPIRBuilder for OpenMP simdlen clause in SIMD
construct.

Reviewed By: shraiysh, peixin, arnamoy10

Differential Revision: https://reviews.llvm.org/D130195

Added: 
    

Modified: 
    flang/lib/Lower/OpenMP.cpp
    flang/test/Lower/OpenMP/simd.f90
    mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
    mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
    mlir/test/Dialect/OpenMP/invalid.mlir
    mlir/test/Dialect/OpenMP/ops.mlir
    mlir/test/Target/LLVMIR/openmp-llvm.mlir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 0016d8c77ea66..30aa5b14c2698 100644

--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -906,6 +906,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
   mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
   mlir::Attribute scheduleClauseOperand, noWaitClauseOperand,
       orderedClauseOperand, orderClauseOperand;
+  mlir::IntegerAttr simdlenClauseOperand;
   SmallVector<Attribute> reductionDeclSymbols;
   Fortran::lower::StatementContext stmtCtx;
   const auto &loopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
@@ -1017,6 +1018,13 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         TODO(currentLocation,
              "Reduction of intrinsic procedures is not supported");
       }
+    } else if (const auto &simdlenClause =
+                   std::get_if<Fortran::parser::OmpClause::Simdlen>(
+                       &clause.u)) {
+      const auto *expr = Fortran::semantics::GetExpr(simdlenClause->v);
+      const std::optional<std::int64_t> simdlenVal =
+          Fortran::evaluate::ToInt64(*expr);
+      simdlenClauseOperand = firOpBuilder.getI64IntegerAttr(*simdlenVal);
     }
   }
 
@@ -1038,7 +1046,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
     TypeRange resultType;
     auto SimdLoopOp = firOpBuilder.create<mlir::omp::SimdLoopOp>(
         currentLocation, resultType, lowerBound, upperBound, step,
-        ifClauseOperand, /*inclusive=*/firOpBuilder.getUnitAttr());
+        ifClauseOperand, simdlenClauseOperand,
+        /*inclusive=*/firOpBuilder.getUnitAttr());
     createBodyOfOp<omp::SimdLoopOp>(SimdLoopOp, converter, currentLocation,
                                     eval, &loopOpClauseList, iv);
     return;

diff  --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90
index df4489f9f20cf..c1adf6e70df14 100644
--- a/flang/test/Lower/OpenMP/simd.f90
+++ b/flang/test/Lower/OpenMP/simd.f90
@@ -36,3 +36,56 @@ subroutine simdloop_with_if_clause(n, threshold)
   end do
   !$OMP END SIMD
 end subroutine
+
+!CHECK-LABEL: func @_QPsimdloop_with_simdlen_clause
+subroutine simdloop_with_simdlen_clause(n, threshold)
+integer :: i, n, threshold
+  !$OMP SIMD SIMDLEN(2)
+  ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
+  ! CHECK: %[[UB:.*]] = fir.load %arg0
+  ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
+  ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
+  do i = 1, n
+    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
+    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) : (!fir.ref<i8>, i32) -> i1
+    print*, i
+  end do
+  !$OMP END SIMD
+end subroutine
+
+!CHECK-LABEL: func @_QPsimdloop_with_simdlen_clause_from_param
+subroutine simdloop_with_simdlen_clause_from_param(n, threshold)
+integer :: i, n, threshold
+integer, parameter :: simdlen = 2;
+  !$OMP SIMD SIMDLEN(simdlen)
+  ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
+  ! CHECK: %[[UB:.*]] = fir.load %arg0
+  ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
+  ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
+  do i = 1, n
+    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
+    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) : (!fir.ref<i8>, i32) -> i1
+    print*, i
+  end do
+  !$OMP END SIMD
+end subroutine
+
+!CHECK-LABEL: func @_QPsimdloop_with_simdlen_clause_from_expr_from_param
+subroutine simdloop_with_simdlen_clause_from_expr_from_param(n, threshold)
+integer :: i, n, threshold
+integer, parameter :: simdlen = 2;
+  !$OMP SIMD SIMDLEN(simdlen*2 + 2)
+  ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
+  ! CHECK: %[[UB:.*]] = fir.load %arg0
+  ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
+  ! CHECK: omp.simdloop simdlen(6) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
+  do i = 1, n
+    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
+    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) : (!fir.ref<i8>, i32) -> i1
+    print*, i
+  end do
+  !$OMP END SIMD
+end subroutine

diff  --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index cc657d723a332..1e1aff5624d88 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -415,6 +415,9 @@ def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments,
     When an if clause is present and evaluates to false, the preferred number of
     iterations to be executed concurrently is one, regardless of whether
     a simdlen clause is speciﬁed.
+
+    When a simdlen clause is present, the preferred number of iterations to be
+    executed concurrently is the value provided to the simdlen clause.
     ```
     omp.simdloop <clauses>
     for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
@@ -429,12 +432,14 @@ def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments,
              Variadic<IntLikeType>:$upperBound,
              Variadic<IntLikeType>:$step,
              Optional<I1>:$if_expr,
+             Confined<OptionalAttr<I64Attr>, [IntPositive]>:$simdlen,
              UnitAttr:$inclusive
      );
  
   let regions = (region AnyRegion:$region);
   let assemblyFormat = [{
     oilist(`if` `(` $if_expr `)`
+          |`simdlen` `(` $simdlen  `)`
     ) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
                                   type($step), $inclusive) attr-dict
   }];

diff  --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 248a32ec9d673..5dcbd00201a99 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -971,7 +971,11 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::CanonicalLoopInfo *loopInfo =
       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
 
-  ompBuilder->applySimd(loopInfo, nullptr);
+  llvm::ConstantInt *simdlen = nullptr;
+  if (llvm::Optional<uint64_t> simdlenVar = loop.simdlen())
+    simdlen = builder.getInt64(simdlenVar.getValue());
+
+  ompBuilder->applySimd(loopInfo, simdlen);
 
   builder.restoreIP(afterIP);
   return success();

diff  --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 8408a5ac82d44..6e6277387bd2c 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -205,6 +205,16 @@ func.func @omp_simdloop(%lb : index, %ub : index, %step : i32) -> () {
 
 // -----
 
+func.func @omp_simdloop_pretty_simdlen(%lb : index, %ub : index, %step : index) -> () {
+  // expected-error @below {{op attribute 'simdlen' failed to satisfy constraint: 64-bit signless integer attribute whose value is positive}}
+  omp.simdloop simdlen(0) for (%iv): index = (%lb) to (%ub) step (%step) {
+    omp.yield
+  }
+  return
+}
+
+// -----
+
 // expected-error @below {{op expects initializer region with one argument of the reduction type}}
 omp.reduction.declare @add_f32 : f64
 init {

diff  --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 4511d116de959..6e28324c67846 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -357,6 +357,15 @@ func.func @omp_simdloop_pretty_if(%lb : index, %ub : index, %step : index, %if_c
   return
 }
 
+// CHECK-LABEL: omp_simdloop_pretty_simdlen
+func.func @omp_simdloop_pretty_simdlen(%lb : index, %ub : index, %step : index) -> () {
+  // CHECK: omp.simdloop simdlen(2) for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.simdloop simdlen(2) for (%iv): index = (%lb) to (%ub) step (%step) {
+    omp.yield
+  }
+  return
+}
+
 // CHECK-LABEL: omp_simdloop_pretty_multiple
 func.func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> () {
   // CHECK: omp.simdloop for (%{{.*}}, %{{.*}}) : index = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})

diff  --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 40ac2e3e28ebe..4b36ed03b549c 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -728,6 +728,28 @@ llvm.func @simdloop_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 :
 
 // -----
 
+// CHECK-LABEL: @simdloop_simple_multiple_simdlen
+llvm.func @simdloop_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr<f32>, %arg1: !llvm.ptr<f32>) {
+  omp.simdloop simdlen(2) for (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+    %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
+    // The form of the emitted IR is controlled by OpenMPIRBuilder and
+    // tested there. Just check that the right metadata is added.
+    // CHECK: llvm.access.group
+    // CHECK-NEXT: llvm.access.group
+    %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+    %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+    llvm.store %3, %4 : !llvm.ptr<f32>
+    llvm.store %3, %5 : !llvm.ptr<f32>
+    omp.yield
+  }
+  llvm.return
+}
+// CHECK: llvm.loop.parallel_accesses
+// CHECK-NEXT: llvm.loop.vectorize.enable
+// CHECK-NEXT: llvm.loop.vectorize.width{{.*}}i64 2
+
+// -----
+
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {