[clang] [mlir] [OpenACC] Implement tile/collapse lowering (PR #138576)

via cfe-commits cfe-commits at lists.llvm.org
Mon May 5 13:21:13 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clangir

Author: Erich Keane (erichkeane)

<details>
<summary>Changes</summary>

These two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering.  The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'.

---
Full diff: https://github.com/llvm/llvm-project/pull/138576.diff


4 Files Affected:

- (modified) clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h (+58) 
- (modified) clang/test/CIR/CodeGenOpenACC/loop.cpp (+84) 
- (modified) mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td (+10) 
- (modified) mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp (+51) 


``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index ff0bf6e7f55dd..7223a8ed15bd5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -12,6 +12,7 @@
 
 #include <type_traits>
 
+#include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 namespace clang {
 // Simple type-trait to see if the first template arg is one of the list, so we
@@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final
     return conversionOp.getResult(0);
   }
 
+  mlir::Value createConstantInt(mlir::Location loc, unsigned width,
+                                int64_t value) {
+    mlir::IntegerType ty = mlir::IntegerType::get(
+        &cgf.getMLIRContext(), width,
+        mlir::IntegerType::SignednessSemantics::Signless);
+    auto constOp = builder.create<mlir::arith::ConstantOp>(
+        loc, ty, builder.getIntegerAttr(ty, value));
+
+    return constOp.getResult();
+  }
+
   mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
     // '*' case leaves no identifier-info, just a nullptr.
     if (!ii)
@@ -336,6 +348,52 @@ class OpenACCClauseCIREmitter final
       return clauseNotImplemented(clause);
     }
   }
+
+  void VisitCollapseClause(const OpenACCCollapseClause &clause) {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+      llvm::APInt value =
+          clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+
+      if (value.getBitWidth() != 64)
+        value = value.sext(64);
+
+      operation.setCollapseForDeviceTypes(builder.getContext(),
+                                          lastDeviceTypeValues, value);
+    } else {
+      // TODO: When we've implemented this for everything, switch this to an
+      // unreachable. Combined constructs remain.
+      return clauseNotImplemented(clause);
+    }
+  }
+
+  void VisitTileClause(const OpenACCTileClause &clause) {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+      llvm::SmallVector<mlir::Value> values;
+
+      for (const Expr *e : clause.getSizeExprs()) {
+        mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc());
+
+        // We represent the * as -1.  Additionally, this is a constant, so we
+        // can always just emit it as 64 bits to avoid having to do any more
+        // work to determine signedness or size.
+        if (isa<OpenACCAsteriskSizeExpr>(e)) {
+          values.push_back(createConstantInt(exprLoc, 64, -1));
+        } else {
+          llvm::APInt curValue =
+              e->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+          values.push_back(
+              createConstantInt(exprLoc, 64, curValue.getSExtValue()));
+        }
+      }
+
+      operation.setTileForDeviceTypes(builder.getContext(),
+                                      lastDeviceTypeValues, values);
+    } else {
+      // TODO: When we've implemented this for everything, switch this to an
+      // unreachable. Combined constructs remain.
+      return clauseNotImplemented(clause);
+    }
+  }
 };
 
 template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp
index 2757d935e1f76..b255a01adda0e 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp
@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
   // CHECK: acc.loop {
   // CHECK: acc.yield
   // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+
+  #pragma acc loop collapse(1) device_type(radeon)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK: acc.loop {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+
+  #pragma acc loop collapse(1) device_type(radeon) collapse (2)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK: acc.loop {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+
+  #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK: acc.loop {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+  #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK: acc.loop {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+
+  #pragma acc loop tile(1, 2, 3)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+  // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+  // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+  #pragma acc loop tile(2) device_type(radeon)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+  // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+  #pragma acc loop tile(2) device_type(radeon) tile (1, *)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+  // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+  #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+  // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+  #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
+  for(unsigned I = 0; I < N; ++I)
+    for(unsigned J = 0; J < N; ++J)
+      for(unsigned K = 0; K < N; ++K);
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+  // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+  // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+  // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
+  // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
+  // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
 }
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c3df064cf0ead..41b01a14a6498 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
     void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>);
     // Add an entry to the 'auto' attribute for each additional device types.
     void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>);
+
+    // Sets the collapse value for this 'loop' for a set of DeviceTypes. Note
+    // that this may only be set once per DeviceType, and will fail the verifier
+    // if this is set multiple times.
+    void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+                                   llvm::APInt);
+    // Sets the tile values for this 'loop' for a set of DeviceTypes. All of the
+    // values should be integral constants, with the '*' represented as a '-1'.
+    void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+                               mlir::ValueRange);
   }];
 
   let hasCustomAssemblyFormat = 1;
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 39dbb0c92a309..f26b3a5143c0b 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context,
                                                   effectiveDeviceTypes));
 }
 
+void acc::LoopOp::setCollapseForDeviceTypes(
+    MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+    llvm::APInt value) {
+  llvm::SmallVector<mlir::Attribute> newValues;
+  llvm::SmallVector<mlir::Attribute> newDeviceTypes;
+
+  assert((getCollapseAttr() == nullptr) ==
+         (getCollapseDeviceTypeAttr() == nullptr));
+  assert(value.getBitWidth() == 64);
+
+  if (getCollapseAttr()) {
+    for (const auto &existing :
+         llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) {
+      newValues.push_back(std::get<0>(existing));
+      newDeviceTypes.push_back(std::get<1>(existing));
+    }
+  }
+
+  if (effectiveDeviceTypes.empty()) {
+    // If the effective device-types list is empty, this is before there are any
+    // being applied by device_type, so this should be added as a 'none'.
+    newValues.push_back(
+        mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+    newDeviceTypes.push_back(
+        acc::DeviceTypeAttr::get(context, DeviceType::None));
+  } else {
+    for (DeviceType DT : effectiveDeviceTypes) {
+      newValues.push_back(
+          mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+      newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT));
+    }
+  }
+
+  setCollapseAttr(ArrayAttr::get(context, newValues));
+  setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes));
+}
+
+void acc::LoopOp::setTileForDeviceTypes(
+    MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+    ValueRange values) {
+  llvm::SmallVector<int32_t> segments;
+  if (getTileOperandsSegments())
+    llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments));
+
+  setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
+      context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values,
+      getTileOperandsMutable(), segments));
+
+  setTileOperandsSegments(segments);
+}
+
 //===----------------------------------------------------------------------===//
 // DataOp
 //===----------------------------------------------------------------------===//

``````````

</details>


https://github.com/llvm/llvm-project/pull/138576


More information about the cfe-commits mailing list