[Mlir-commits] [clang] [mlir] [OpenACC] Implement tile/collapse lowering (PR #138576)
Erich Keane
llvmlistbot at llvm.org
Tue May 6 07:16:28 PDT 2025
https://github.com/erichkeane updated https://github.com/llvm/llvm-project/pull/138576
>From 57c9faf4a0bc4a589f56fee528df8b06bdec7e54 Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane at nvidia.com>
Date: Mon, 5 May 2025 10:16:35 -0700
Subject: [PATCH 1/3] [OpenACC] Implement tile/collapse lowering
These two ended up being pretty similar in frontend implementation, and
fairly trivial when doing lowering. The collapse clause jsut results in
a normal device_type style attribute with some mild additional
complexity, and 'tile' just uses the current infrastructure for 'with
segments'.
---
clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h | 58 +++++++++++++
clang/test/CIR/CodeGenOpenACC/loop.cpp | 84 +++++++++++++++++++
.../mlir/Dialect/OpenACC/OpenACCOps.td | 10 +++
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 51 +++++++++++
4 files changed, 203 insertions(+)
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index ff0bf6e7f55dd..7223a8ed15bd5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -12,6 +12,7 @@
#include <type_traits>
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
namespace clang {
// Simple type-trait to see if the first template arg is one of the list, so we
@@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final
return conversionOp.getResult(0);
}
+ mlir::Value createConstantInt(mlir::Location loc, unsigned width,
+ int64_t value) {
+ mlir::IntegerType ty = mlir::IntegerType::get(
+ &cgf.getMLIRContext(), width,
+ mlir::IntegerType::SignednessSemantics::Signless);
+ auto constOp = builder.create<mlir::arith::ConstantOp>(
+ loc, ty, builder.getIntegerAttr(ty, value));
+
+ return constOp.getResult();
+ }
+
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
// '*' case leaves no identifier-info, just a nullptr.
if (!ii)
@@ -336,6 +348,52 @@ class OpenACCClauseCIREmitter final
return clauseNotImplemented(clause);
}
}
+
+ void VisitCollapseClause(const OpenACCCollapseClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::APInt value =
+ clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+
+ if (value.getBitWidth() != 64)
+ value = value.sext(64);
+
+ operation.setCollapseForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, value);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
+
+ void VisitTileClause(const OpenACCTileClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::SmallVector<mlir::Value> values;
+
+ for (const Expr *e : clause.getSizeExprs()) {
+ mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc());
+
+ // We represent the * as -1. Additionally, this is a constant, so we
+ // can always just emit it as 64 bits to avoid having to do any more
+ // work to determine signedness or size.
+ if (isa<OpenACCAsteriskSizeExpr>(e)) {
+ values.push_back(createConstantInt(exprLoc, 64, -1));
+ } else {
+ llvm::APInt curValue =
+ e->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+ values.push_back(
+ createConstantInt(exprLoc, 64, curValue.getSExtValue()));
+ }
+ }
+
+ operation.setTileForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, values);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
};
template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp
index 2757d935e1f76..b255a01adda0e 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp
@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+
+ #pragma acc loop collapse(1) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+
+ #pragma acc loop collapse(1) device_type(radeon) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+
+ #pragma acc loop tile(1, 2, 3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon) tile (1, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+
}
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c3df064cf0ead..41b01a14a6498 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>);
// Add an entry to the 'auto' attribute for each additional device types.
void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>);
+
+ // Sets the collapse value for this 'loop' for a set of DeviceTypes. Note
+ // that this may only be set once per DeviceType, and will fail the verifier
+ // if this is set multiple times.
+ void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ llvm::APInt);
+ // Sets the tile values for this 'loop' for a set of DeviceTypes. All of the
+ // values should be integral constants, with the '*' represented as a '-1'.
+ void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ mlir::ValueRange);
}];
let hasCustomAssemblyFormat = 1;
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 39dbb0c92a309..f26b3a5143c0b 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context,
effectiveDeviceTypes));
}
+void acc::LoopOp::setCollapseForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ llvm::APInt value) {
+ llvm::SmallVector<mlir::Attribute> newValues;
+ llvm::SmallVector<mlir::Attribute> newDeviceTypes;
+
+ assert((getCollapseAttr() == nullptr) ==
+ (getCollapseDeviceTypeAttr() == nullptr));
+ assert(value.getBitWidth() == 64);
+
+ if (getCollapseAttr()) {
+ for (const auto &existing :
+ llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) {
+ newValues.push_back(std::get<0>(existing));
+ newDeviceTypes.push_back(std::get<1>(existing));
+ }
+ }
+
+ if (effectiveDeviceTypes.empty()) {
+ // If the effective device-types list is empty, this is before there are any
+ // being applied by device_type, so this should be added as a 'none'.
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(
+ acc::DeviceTypeAttr::get(context, DeviceType::None));
+ } else {
+ for (DeviceType DT : effectiveDeviceTypes) {
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT));
+ }
+ }
+
+ setCollapseAttr(ArrayAttr::get(context, newValues));
+ setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes));
+}
+
+void acc::LoopOp::setTileForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ ValueRange values) {
+ llvm::SmallVector<int32_t> segments;
+ if (getTileOperandsSegments())
+ llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments));
+
+ setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
+ context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values,
+ getTileOperandsMutable(), segments));
+
+ setTileOperandsSegments(segments);
+}
+
//===----------------------------------------------------------------------===//
// DataOp
//===----------------------------------------------------------------------===//
>From a105da110a4c9179d5e99f8919290213d4b0dd66 Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane at nvidia.com>
Date: Tue, 6 May 2025 06:25:47 -0700
Subject: [PATCH 2/3] Be more tolerant of constant integral sizes
---
clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index 7223a8ed15bd5..69025d038d02c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -354,9 +354,7 @@ class OpenACCClauseCIREmitter final
llvm::APInt value =
clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext());
- if (value.getBitWidth() != 64)
- value = value.sext(64);
-
+ value = value.sextOrTrunc(64);
operation.setCollapseForDeviceTypes(builder.getContext(),
lastDeviceTypeValues, value);
} else {
@@ -381,8 +379,8 @@ class OpenACCClauseCIREmitter final
} else {
llvm::APInt curValue =
e->EvaluateKnownConstInt(cgf.cgm.getASTContext());
- values.push_back(
- createConstantInt(exprLoc, 64, curValue.getSExtValue()));
+ values.push_back(createConstantInt(
+ exprLoc, 64, curValue.sextOrTrunc(64).getSExtValue()));
}
}
>From e7646cc7b8d01ba2ed4c5a8d6209c76d9f382d47 Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane at nvidia.com>
Date: Tue, 6 May 2025 07:14:51 -0700
Subject: [PATCH 3/3] Remove unneeded type specification
---
clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index 69025d038d02c..fa4ce5efc39ad 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -89,7 +89,7 @@ class OpenACCClauseCIREmitter final
&cgf.getMLIRContext(), width,
mlir::IntegerType::SignednessSemantics::Signless);
auto constOp = builder.create<mlir::arith::ConstantOp>(
- loc, ty, builder.getIntegerAttr(ty, value));
+ loc, builder.getIntegerAttr(ty, value));
return constOp.getResult();
}
More information about the Mlir-commits
mailing list