[clang] [mlir] [OpenACC] Implement tile/collapse lowering (PR #138576)
via cfe-commits
cfe-commits at lists.llvm.org
Mon May 5 13:21:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clangir
Author: Erich Keane (erichkeane)
<details>
<summary>Changes</summary>
These two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'.
---
Full diff: https://github.com/llvm/llvm-project/pull/138576.diff
4 Files Affected:
- (modified) clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h (+58)
- (modified) clang/test/CIR/CodeGenOpenACC/loop.cpp (+84)
- (modified) mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td (+10)
- (modified) mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp (+51)
``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index ff0bf6e7f55dd..7223a8ed15bd5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -12,6 +12,7 @@
#include <type_traits>
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
namespace clang {
// Simple type-trait to see if the first template arg is one of the list, so we
@@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final
return conversionOp.getResult(0);
}
+ mlir::Value createConstantInt(mlir::Location loc, unsigned width,
+ int64_t value) {
+ mlir::IntegerType ty = mlir::IntegerType::get(
+ &cgf.getMLIRContext(), width,
+ mlir::IntegerType::SignednessSemantics::Signless);
+ auto constOp = builder.create<mlir::arith::ConstantOp>(
+ loc, ty, builder.getIntegerAttr(ty, value));
+
+ return constOp.getResult();
+ }
+
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
// '*' case leaves no identifier-info, just a nullptr.
if (!ii)
@@ -336,6 +348,52 @@ class OpenACCClauseCIREmitter final
return clauseNotImplemented(clause);
}
}
+
+ void VisitCollapseClause(const OpenACCCollapseClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::APInt value =
+ clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+
+ if (value.getBitWidth() != 64)
+ value = value.sext(64);
+
+ operation.setCollapseForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, value);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
+
+ void VisitTileClause(const OpenACCTileClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::SmallVector<mlir::Value> values;
+
+ for (const Expr *e : clause.getSizeExprs()) {
+ mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc());
+
+ // We represent the * as -1. Additionally, this is a constant, so we
+ // can always just emit it as 64 bits to avoid having to do any more
+ // work to determine signedness or size.
+ if (isa<OpenACCAsteriskSizeExpr>(e)) {
+ values.push_back(createConstantInt(exprLoc, 64, -1));
+ } else {
+ llvm::APInt curValue =
+ e->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+ values.push_back(
+ createConstantInt(exprLoc, 64, curValue.getSExtValue()));
+ }
+ }
+
+ operation.setTileForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, values);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
};
template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp
index 2757d935e1f76..b255a01adda0e 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp
@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+
+ #pragma acc loop collapse(1) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+
+ #pragma acc loop collapse(1) device_type(radeon) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+
+ #pragma acc loop tile(1, 2, 3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon) tile (1, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+
}
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c3df064cf0ead..41b01a14a6498 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>);
// Add an entry to the 'auto' attribute for each additional device types.
void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>);
+
+ // Sets the collapse value for this 'loop' for a set of DeviceTypes. Note
+ // that this may only be set once per DeviceType, and will fail the verifier
+ // if this is set multiple times.
+ void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ llvm::APInt);
+ // Sets the tile values for this 'loop' for a set of DeviceTypes. All of the
+ // values should be integral constants, with the '*' represented as a '-1'.
+ void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ mlir::ValueRange);
}];
let hasCustomAssemblyFormat = 1;
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 39dbb0c92a309..f26b3a5143c0b 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context,
effectiveDeviceTypes));
}
+void acc::LoopOp::setCollapseForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ llvm::APInt value) {
+ llvm::SmallVector<mlir::Attribute> newValues;
+ llvm::SmallVector<mlir::Attribute> newDeviceTypes;
+
+ assert((getCollapseAttr() == nullptr) ==
+ (getCollapseDeviceTypeAttr() == nullptr));
+ assert(value.getBitWidth() == 64);
+
+ if (getCollapseAttr()) {
+ for (const auto &existing :
+ llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) {
+ newValues.push_back(std::get<0>(existing));
+ newDeviceTypes.push_back(std::get<1>(existing));
+ }
+ }
+
+ if (effectiveDeviceTypes.empty()) {
+ // If the effective device-types list is empty, this is before there are any
+ // being applied by device_type, so this should be added as a 'none'.
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(
+ acc::DeviceTypeAttr::get(context, DeviceType::None));
+ } else {
+ for (DeviceType DT : effectiveDeviceTypes) {
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT));
+ }
+ }
+
+ setCollapseAttr(ArrayAttr::get(context, newValues));
+ setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes));
+}
+
+void acc::LoopOp::setTileForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ ValueRange values) {
+ llvm::SmallVector<int32_t> segments;
+ if (getTileOperandsSegments())
+ llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments));
+
+ setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
+ context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values,
+ getTileOperandsMutable(), segments));
+
+ setTileOperandsSegments(segments);
+}
+
//===----------------------------------------------------------------------===//
// DataOp
//===----------------------------------------------------------------------===//
``````````
</details>
https://github.com/llvm/llvm-project/pull/138576
More information about the cfe-commits
mailing list