[Mlir-commits] [clang] [mlir] [OpenACC][CIR] Implement 'gang' lowering on `routine' (PR #170506)
Erich Keane
llvmlistbot at llvm.org
Wed Dec 3 08:35:02 PST 2025
https://github.com/erichkeane created https://github.com/llvm/llvm-project/pull/170506
This is a bit more work than the worker/vector/seq in that gang takes an optional `dim` argument. The argument is always 1, 2, or 3 (constants!), and the other argument-types that gang allows elsewhere aren't valid here.
For the IR, we had to add 2 overloads of `addGang`. The first just adds the 'valueless' one, which can just add to the one ArrayAttr. The second has to add to TWO lists.
Note: The standard limits to only 1 `gang` per construct. We decided after evaluating it, that it really means 'per device-type region'. However, device_type isn't implemented yet, so we'll add tests for that when we do.
At the moment, we added the device_type infrastructure however.
>From 674299e115792330d14e988e76a202f91879abc9 Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane at nvidia.com>
Date: Wed, 3 Dec 2025 06:37:57 -0800
Subject: [PATCH] [OpenACC][CIR] Implement 'gang' lowering on `routine'
This is a bit more work than the worker/vector/seq in that gang takes an
optional `dim` argument. The argument is always 1, 2, or 3
(constants!), and the other argument-types that gang allows elsewhere
aren't valid here.
For the IR, we had to add 2 overloads of `addGang`. The first just adds
the 'valueless' one, which can just add to the one ArrayAttr. The
second has to add to TWO lists.
Note: The standard limits to only 1 `gang` per construct. We decided
after evaluating it, that it really means 'per device-type region'.
However, device_type isn't implemented yet, so we'll add tests for that
when we do.
At the moment, we added the device_type infrastructure however.
---
clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp | 28 +++++++++++--
.../CIR/CodeGenOpenACC/routine-clauses.cpp | 39 +++++++++++++++++++
.../mlir/Dialect/OpenACC/OpenACCOps.td | 5 +++
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 37 ++++++++++++++++++
4 files changed, 106 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
index a5322ac4e1930..0d76587dd48b1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
@@ -303,14 +303,16 @@ void CIRGenModule::emitGlobalOpenACCRoutineDecl(const OpenACCRoutineDecl *d) {
namespace {
class OpenACCRoutineClauseEmitter final
: public OpenACCClauseVisitor<OpenACCRoutineClauseEmitter> {
+ CIRGenModule &cgm;
CIRGen::CIRGenBuilderTy &builder;
mlir::acc::RoutineOp routineOp;
llvm::SmallVector<mlir::acc::DeviceType> lastDeviceTypeValues;
public:
- OpenACCRoutineClauseEmitter(CIRGen::CIRGenBuilderTy &builder,
+ OpenACCRoutineClauseEmitter(CIRGenModule &cgm,
+ CIRGen::CIRGenBuilderTy &builder,
mlir::acc::RoutineOp routineOp)
- : builder(builder), routineOp(routineOp) {}
+ : cgm(cgm), builder(builder), routineOp(routineOp) {}
void emitClauses(ArrayRef<const OpenACCClause *> clauses) {
this->VisitClauseList(clauses);
@@ -333,6 +335,26 @@ class OpenACCRoutineClauseEmitter final
void VisitNoHostClause(const OpenACCNoHostClause &clause) {
routineOp.setNohost(/*attrValue=*/true);
}
+
+ void VisitGangClause(const OpenACCGangClause &clause) {
+ // Gang has an optional 'dim' value, which is a constant int of 1, 2, or 3.
+ // If we don't store any expressions in the clause, there are none, else we
+ // expect there is 1, since Sema should enforce that the single 'dim' is the
+ // only valid value.
+ if (clause.getNumExprs() == 0) {
+ routineOp.addGang(builder.getContext(), lastDeviceTypeValues);
+ } else {
+ assert(clause.getNumExprs() == 1);
+ auto [kind, expr] = clause.getExpr(0);
+ assert(kind == OpenACCGangKind::Dim);
+
+ llvm::APSInt curValue = expr->EvaluateKnownConstInt(cgm.getASTContext());
+ // The value is 1, 2, or 3, but 64 bit seems right enough.
+ curValue = curValue.sextOrTrunc(64);
+ routineOp.addGang(builder.getContext(), lastDeviceTypeValues,
+ curValue.getZExtValue());
+ }
+ }
};
} // namespace
@@ -373,6 +395,6 @@ void CIRGenModule::emitOpenACCRoutineDecl(
mlir::acc::getRoutineInfoAttrName(),
mlir::acc::RoutineInfoAttr::get(func.getContext(), funcRoutines));
- OpenACCRoutineClauseEmitter emitter{builder, routineOp};
+ OpenACCRoutineClauseEmitter emitter{*this, builder, routineOp};
emitter.emitClauses(clauses);
}
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp b/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp
index 81437e7e02ab1..6500b07ff1eb7 100644
--- a/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp
@@ -18,6 +18,27 @@ void Func5() {}
void Func6() {}
#pragma acc routine(Func6) nohost vector
+#pragma acc routine gang
+void Func7() {}
+
+void Func8() {}
+#pragma acc routine(Func8) gang
+
+#pragma acc routine gang(dim:1)
+void Func9() {}
+
+void Func10() {}
+#pragma acc routine(Func10) gang(dim:3)
+
+constexpr int Value = 2;
+
+#pragma acc routine gang(dim:Value) nohost
+void Func11() {}
+
+
+void Func12() {}
+#pragma acc routine(Func12) nohost gang(dim:Value)
+
// CHECK: cir.func{{.*}} @[[F1_NAME:.*Func1[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F1_R_NAME:.*]]]>}
// CHECK: acc.routine @[[F1_R_NAME]] func(@[[F1_NAME]]) seq nohost
@@ -32,7 +53,25 @@ void Func6() {}
// CHECK: acc.routine @[[F5_R_NAME]] func(@[[F5_NAME]]) vector
// CHECK: cir.func{{.*}} @[[F6_NAME:.*Func6[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F6_R_NAME:.*]]]>}
+//
+// CHECK: cir.func{{.*}} @[[F7_NAME:.*Func7[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F7_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F7_R_NAME]] func(@[[F7_NAME]]) gang
+//
+// CHECK: cir.func{{.*}} @[[F8_NAME:.*Func8[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F8_R_NAME:.*]]]>}
+//
+// CHECK: cir.func{{.*}} @[[F9_NAME:.*Func9[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F9_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F9_R_NAME]] func(@[[F9_NAME]]) gang(dim: 1 : i64)
+//
+// CHECK: cir.func{{.*}} @[[F10_NAME:.*Func10[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F10_R_NAME:.*]]]>}
+
+// CHECK: cir.func{{.*}} @[[F11_NAME:.*Func11[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F11_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F11_R_NAME]] func(@[[F11_NAME]]) gang(dim: 2 : i64)
+//
+// CHECK: cir.func{{.*}} @[[F12_NAME:.*Func12[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F12_R_NAME:.*]]]>}
// CHECK: acc.routine @[[F2_R_NAME]] func(@[[F2_NAME]]) seq
// CHECK: acc.routine @[[F4_R_NAME]] func(@[[F4_NAME]]) worker nohost
// CHECK: acc.routine @[[F6_R_NAME]] func(@[[F6_NAME]]) vector nohost
+// CHECK: acc.routine @[[F8_R_NAME]] func(@[[F8_NAME]]) gang
+// CHECK: acc.routine @[[F10_R_NAME]] func(@[[F10_NAME]]) gang(dim: 3 : i64)
+// CHECK: acc.routine @[[F12_R_NAME]] func(@[[F12_NAME]]) gang(dim: 2 : i64)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 77d1a6f8d53b5..be50d38689218 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -3286,6 +3286,11 @@ def OpenACC_RoutineOp : OpenACC_Op<"routine", [IsolatedFromAbove]> {
void addVector(MLIRContext *, llvm::ArrayRef<DeviceType>);
// Add an entry to the 'worker' attribute for each additional device types.
void addWorker(MLIRContext *, llvm::ArrayRef<DeviceType>);
+ // Add an entry to the 'gang' attribute for each additional device type.
+ void addGang(MLIRContext *, llvm::ArrayRef<DeviceType>);
+ // Add an entry to the 'gang' attribute with a value for each additional
+ // device type.
+ void addGang(MLIRContext *, llvm::ArrayRef<DeviceType>, uint64_t);
}];
let assemblyFormat = [{
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 7039bbe1d11ec..e3614118b5ad6 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -4367,6 +4367,43 @@ void RoutineOp::addWorker(MLIRContext *context,
effectiveDeviceTypes));
}
+void RoutineOp::addGang(MLIRContext *context,
+ llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+ setGangAttr(addDeviceTypeAffectedOperandHelper(context, getGangAttr(),
+ effectiveDeviceTypes));
+}
+
+void RoutineOp::addGang(MLIRContext *context,
+ llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ uint64_t val) {
+ llvm::SmallVector<mlir::Attribute> dimValues;
+ llvm::SmallVector<mlir::Attribute> deviceTypes;
+
+ if (getGangDimAttr())
+ llvm::copy(getGangDimAttr(), std::back_inserter(dimValues));
+ if (getGangDimDeviceTypeAttr())
+ llvm::copy(getGangDimDeviceTypeAttr(), std::back_inserter(deviceTypes));
+
+ assert(dimValues.size() == deviceTypes.size());
+
+ if (effectiveDeviceTypes.empty()) {
+ dimValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), val));
+ deviceTypes.push_back(
+ acc::DeviceTypeAttr::get(context, acc::DeviceType::None));
+ } else {
+ for (DeviceType dt : effectiveDeviceTypes) {
+ dimValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), val));
+ deviceTypes.push_back(acc::DeviceTypeAttr::get(context, dt));
+ }
+ }
+ assert(dimValues.size() == deviceTypes.size());
+
+ setGangDimAttr(mlir::ArrayAttr::get(context, dimValues));
+ setGangDimDeviceTypeAttr(mlir::ArrayAttr::get(context, deviceTypes));
+}
+
//===----------------------------------------------------------------------===//
// InitOp
//===----------------------------------------------------------------------===//
More information about the Mlir-commits
mailing list