[Mlir-commits] [mlir] [mlir][acc] Use index for acc.par_width results (PR #187734)
Razvan Lupusoru
llvmlistbot at llvm.org
Fri Mar 20 09:46:09 PDT 2026
https://github.com/razvanlupusoru updated https://github.com/llvm/llvm-project/pull/187734
>From 620065dc9b10340f8d8dc617b015072f4810bad5 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Fri, 20 Mar 2026 09:40:25 -0700
Subject: [PATCH 1/2] [mlir][acc] Use index for acc.par_width results
When acc.par_width was introduced in
https://github.com/llvm/llvm-project/pull/184864
there was a discussion on whether to use index or create a new type for
the output of the operation. It was decided to create a new type; but
this means that launch arguments cannot be used directly in the region
such as for loop bounds without a conversion from the new type to index.
In order to avoid the casting operations (and introduction of an actual
operation to do this cast), simply restore acc.par_width to generate
index type. This allows its result to be directly used in
acc.compute_region.
---
mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td | 12 ++++++------
mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td | 8 --------
mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h | 5 ++++-
mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp | 9 +++++++--
.../OpenACC/Transforms/ACCComputeLowering.cpp | 4 +++-
mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp | 4 ++--
mlir/test/Dialect/OpenACC/invalid-cg.mlir | 5 ++---
.../unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp | 5 +++++
8 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
index f6ae871eb9936..63fc8476c08d4 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
@@ -249,7 +249,7 @@ def OpenACC_ParWidthOp
}];
let arguments = (ins Optional<Index>:$launchArg,
OpenACC_GPUParallelDimAttr:$par_dim);
- let results = (outs OpenACC_ParWidthType:$output);
+ let results = (outs Index:$output);
let assemblyFormat = [{
($launchArg^)? attr-dict
}];
@@ -284,10 +284,10 @@ def OpenACC_ComputeRegionOp
The operation is `IsolatedFromAbove`: all values used inside the
region must be explicitly captured. Values are captured in two ways:
- - Launch arguments (`launch`): Results of operations that define
- the parallel launch configuration. These are `!acc.par_width`-typed
- and become block arguments representing the parallel width for each
- dimension.
+ - Launch arguments (`launch`): Results of `acc.par_width`
+ operations that define the parallel launch configuration. These
+ become `index`-typed block arguments representing the parallel
+ width for each dimension.
- Input arguments (`ins`): Arbitrary values captured from outside
the region (data pointers, scalars, etc.). These become block
@@ -316,7 +316,7 @@ def OpenACC_ComputeRegionOp
```
}];
- let arguments = (ins Variadic<OpenACC_ParWidthType>:$launchArgs,
+ let arguments = (ins Variadic<Index>:$launchArgs,
Variadic<AnyType>:$inputArgs,
Optional<OpenACC_GPUAsyncTokenType>:$stream,
StrAttr:$origin,
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
index bba385e69c0f2..117272693d626 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
@@ -33,12 +33,4 @@ def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> {
}];
}
-def OpenACC_ParWidthType : OpenACC_Type<"ParWidth", "par_width"> {
- let summary = "parallel width token type";
- let description = [{
- Represents a type that is consumed by a compute region in order to
- capture its parallelism dimensions arguments.
- }];
-}
-
#endif // OPENACC_OPS_TYPES
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h
index f72d080858747..bb132f5d02e8c 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h
@@ -38,7 +38,10 @@ std::optional<DataLayout> getDataLayout(Operation *op,
///
/// Creates a new `acc.compute_region` with the given launch arguments and
/// origin string, then clones the operations from `regionToClone` into its
-/// body. Multi-block regions are wrapped with `scf.execute_region`.
+/// body. Launch operands should be `acc.par_width` results (`index`); the
+/// region entry block gets matching `index` block arguments first, then
+/// arguments for each `ins` operand. Multi-block regions are wrapped with
+/// `scf.execute_region`.
///
/// The `mapping` is used and updated during cloning, allowing callers to
/// track value correspondences. Optional `output`, `kernelFuncName`,
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
index 7b1cfaa048809..04f8c848c7287 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
@@ -455,6 +455,11 @@ BlockArgument ComputeRegionOp::gpuParWidth(gpu::Processor processor) {
}
LogicalResult ComputeRegionOp::verify() {
+ for (auto op : getLaunchArgs())
+ if (!op.getDefiningOp<acc::ParWidthOp>())
+ return emitOpError(
+ "launch arguments must be results of acc.par_width operations");
+
unsigned expectedBlockArgs = getLaunchArgs().size() + getInputArgs().size();
unsigned actualBlockArgs = getRegion().front().getNumArguments();
if (expectedBlockArgs != actualBlockArgs)
@@ -531,9 +536,9 @@ ParseResult ComputeRegionOp::parse(OpAsmParser &parser,
if (succeeded(parser.parseOptionalKeyword("launch"))) {
if (parser.parseAssignmentList(regionArgs, launchOperands))
return failure();
- auto parWidthType = acc::ParWidthType::get(builder.getContext());
+ Type indexType = builder.getIndexType();
for (size_t i = 0; i < regionArgs.size(); ++i)
- types.push_back(parWidthType);
+ types.push_back(indexType);
}
if (succeeded(parser.parseOptionalKeyword("ins"))) {
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp
index db504afafc224..e0b0acff57cae 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp
@@ -25,7 +25,9 @@
// 1. Compute constructs: acc.parallel, acc.serial, and acc.kernels are
// replaced by acc.kernel_environment containing a single acc.compute_region.
// Launch arguments (num_gangs, num_workers, vector_length) become
-// acc.par_width ops and are passed as compute_region launch operands.
+// acc.par_width ops (each result is `index`) and are passed as
+// compute_region launch operands (still required to be acc.par_width
+// results by the compute_region verifier).
//
// 2. acc.loop: Converted according to context and attributes:
// - Unstructured: body wrapped in scf.execute_region.
diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp
index c5fa50642cade..d18522c3c440f 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp
@@ -78,10 +78,10 @@ ComputeRegionOp buildComputeRegion(Location loc, ValueRange launchArgs,
assert(mapKeys.size() == inputArgs.size() &&
"inputArgsToMap must have same size as inputArgs when provided");
- auto parWidthType = ParWidthType::get(rewriter.getContext());
+ Type indexType = rewriter.getIndexType();
Block *entryBlock = rewriter.createBlock(&computeRegion.getRegion());
for (size_t i = 0; i < launchArgs.size(); ++i)
- entryBlock->addArgument(parWidthType, loc);
+ entryBlock->addArgument(indexType, loc);
for (Value input : inputArgs)
entryBlock->addArgument(input.getType(), loc);
for (size_t i = 0; i < inputArgs.size(); ++i)
diff --git a/mlir/test/Dialect/OpenACC/invalid-cg.mlir b/mlir/test/Dialect/OpenACC/invalid-cg.mlir
index f788e6c03bcc9..d218bc505a5ea 100644
--- a/mlir/test/Dialect/OpenACC/invalid-cg.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid-cg.mlir
@@ -22,9 +22,8 @@ scf.parallel (%iv) = (%c0_2) to (%c4_2) step (%c1_2) {
// -----
-// expected-note at +1 {{prior use here}}
%c32 = arith.constant 32 : index
-// expected-error at +1 {{use of value '%c32' expects different type than prior uses: '!acc.par_width' vs 'index'}}
+// expected-error at +1 {{'acc.compute_region' op launch arguments must be results of acc.par_width operations}}
acc.compute_region launch(%arg0 = %c32) {
acc.yield
} {origin = "acc.parallel"}
@@ -38,4 +37,4 @@ acc.compute_region launch(%arg0 = %c32) {
"acc.compute_region"(%w) <{operandSegmentSizes = array<i32: 1, 0, 0>}> ({
^bb0(%arg0: index, %extra: index):
"acc.yield"() : () -> ()
-}) {origin = "acc.parallel"} : (!acc.par_width) -> ()
+}) {origin = "acc.parallel"} : (index) -> ()
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
index 2940145e40c74..bfbbeb8a5e54d 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
@@ -14,6 +14,7 @@
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OwningOpRef.h"
@@ -145,6 +146,10 @@ TEST_F(OpenACCUtilsCGTest, buildComputeRegionWithLaunchArgs) {
EXPECT_EQ(cr.getOrigin(), ParallelOp::getOperationName());
EXPECT_EQ(cr.getLaunchArgs().size(), 1u);
EXPECT_EQ(cr.getLaunchArgs()[0], pw.getResult());
+ EXPECT_TRUE(llvm::isa<IndexType>(pw.getResult().getType()));
+ ASSERT_FALSE(cr.getRegion().empty());
+ EXPECT_TRUE(llvm::isa<IndexType>(
+ cr.getRegion().front().getArgument(0).getType()));
func::ReturnOp::create(rewriter, loc);
}
>From 91cd1b1fabf35b4c0cfcb1814accd34996947ed0 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Fri, 20 Mar 2026 09:45:57 -0700
Subject: [PATCH 2/2] Fix format
---
mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
index bfbbeb8a5e54d..e7e5974ed5c70 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
@@ -148,8 +148,8 @@ TEST_F(OpenACCUtilsCGTest, buildComputeRegionWithLaunchArgs) {
EXPECT_EQ(cr.getLaunchArgs()[0], pw.getResult());
EXPECT_TRUE(llvm::isa<IndexType>(pw.getResult().getType()));
ASSERT_FALSE(cr.getRegion().empty());
- EXPECT_TRUE(llvm::isa<IndexType>(
- cr.getRegion().front().getArgument(0).getType()));
+ EXPECT_TRUE(
+ llvm::isa<IndexType>(cr.getRegion().front().getArgument(0).getType()));
func::ReturnOp::create(rewriter, loc);
}
More information about the Mlir-commits
mailing list