[Mlir-commits] [mlir] [mlir][acc] Use index for acc.par_width results (PR #187734)

Razvan Lupusoru llvmlistbot at llvm.org
Fri Mar 20 09:46:09 PDT 2026


https://github.com/razvanlupusoru updated https://github.com/llvm/llvm-project/pull/187734

>From 620065dc9b10340f8d8dc617b015072f4810bad5 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Fri, 20 Mar 2026 09:40:25 -0700
Subject: [PATCH 1/2] [mlir][acc] Use index for acc.par_width results

When acc.par_width was introduced in
https://github.com/llvm/llvm-project/pull/184864
there was a discussion on whether to use index or create a new type for
the output of the operation. It was decided to create a new type; but
this means that launch arguments cannot be used directly in the region
such as for loop bounds without a conversion from the new type to index.
In order to avoid the casting operations (and introduction of an actual
operation to do this cast), simply restore acc.par_width to generate
index type. This allows its result to be directly used in
acc.compute_region.
---
 mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td    | 12 ++++++------
 mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td |  8 --------
 mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h   |  5 ++++-
 mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp            |  9 +++++++--
 .../OpenACC/Transforms/ACCComputeLowering.cpp        |  4 +++-
 mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp    |  4 ++--
 mlir/test/Dialect/OpenACC/invalid-cg.mlir            |  5 ++---
 .../unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp |  5 +++++
 8 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
index f6ae871eb9936..63fc8476c08d4 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
@@ -249,7 +249,7 @@ def OpenACC_ParWidthOp
   }];
   let arguments = (ins Optional<Index>:$launchArg,
                        OpenACC_GPUParallelDimAttr:$par_dim);
-  let results = (outs OpenACC_ParWidthType:$output);
+  let results = (outs Index:$output);
   let assemblyFormat = [{
     ($launchArg^)? attr-dict
   }];
@@ -284,10 +284,10 @@ def OpenACC_ComputeRegionOp
     The operation is `IsolatedFromAbove`: all values used inside the
     region must be explicitly captured. Values are captured in two ways:
 
-    - Launch arguments (`launch`): Results of operations that define
-      the parallel launch configuration. These are `!acc.par_width`-typed
-      and become block arguments representing the parallel width for each
-      dimension.
+    - Launch arguments (`launch`): Results of `acc.par_width`
+      operations that define the parallel launch configuration. These
+      become `index`-typed block arguments representing the parallel
+      width for each dimension.
 
     - Input arguments (`ins`): Arbitrary values captured from outside
       the region (data pointers, scalars, etc.). These become block
@@ -316,7 +316,7 @@ def OpenACC_ComputeRegionOp
     ```
   }];
 
-  let arguments = (ins Variadic<OpenACC_ParWidthType>:$launchArgs,
+  let arguments = (ins Variadic<Index>:$launchArgs,
                        Variadic<AnyType>:$inputArgs,
                        Optional<OpenACC_GPUAsyncTokenType>:$stream,
                        StrAttr:$origin,
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
index bba385e69c0f2..117272693d626 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
@@ -33,12 +33,4 @@ def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> {
   }];
 }
 
-def OpenACC_ParWidthType : OpenACC_Type<"ParWidth", "par_width"> {
-  let summary = "parallel width token type";
-  let description = [{
-    Represents a type that is consumed by a compute region in order to
-    capture its parallelism dimensions arguments.
-  }];
-}
-
 #endif // OPENACC_OPS_TYPES
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h
index f72d080858747..bb132f5d02e8c 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h
@@ -38,7 +38,10 @@ std::optional<DataLayout> getDataLayout(Operation *op,
 ///
 /// Creates a new `acc.compute_region` with the given launch arguments and
 /// origin string, then clones the operations from `regionToClone` into its
-/// body. Multi-block regions are wrapped with `scf.execute_region`.
+/// body. Launch operands should be `acc.par_width` results (`index`); the
+/// region entry block gets matching `index` block arguments first, then
+/// arguments for each `ins` operand. Multi-block regions are wrapped with
+/// `scf.execute_region`.
 ///
 /// The `mapping` is used and updated during cloning, allowing callers to
 /// track value correspondences. Optional `output`, `kernelFuncName`,
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
index 7b1cfaa048809..04f8c848c7287 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
@@ -455,6 +455,11 @@ BlockArgument ComputeRegionOp::gpuParWidth(gpu::Processor processor) {
 }
 
 LogicalResult ComputeRegionOp::verify() {
+  for (auto op : getLaunchArgs())
+    if (!op.getDefiningOp<acc::ParWidthOp>())
+      return emitOpError(
+          "launch arguments must be results of acc.par_width operations");
+
   unsigned expectedBlockArgs = getLaunchArgs().size() + getInputArgs().size();
   unsigned actualBlockArgs = getRegion().front().getNumArguments();
   if (expectedBlockArgs != actualBlockArgs)
@@ -531,9 +536,9 @@ ParseResult ComputeRegionOp::parse(OpAsmParser &parser,
   if (succeeded(parser.parseOptionalKeyword("launch"))) {
     if (parser.parseAssignmentList(regionArgs, launchOperands))
       return failure();
-    auto parWidthType = acc::ParWidthType::get(builder.getContext());
+    Type indexType = builder.getIndexType();
     for (size_t i = 0; i < regionArgs.size(); ++i)
-      types.push_back(parWidthType);
+      types.push_back(indexType);
   }
 
   if (succeeded(parser.parseOptionalKeyword("ins"))) {
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp
index db504afafc224..e0b0acff57cae 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp
@@ -25,7 +25,9 @@
 // 1. Compute constructs: acc.parallel, acc.serial, and acc.kernels are
 //    replaced by acc.kernel_environment containing a single acc.compute_region.
 //    Launch arguments (num_gangs, num_workers, vector_length) become
-//    acc.par_width ops and are passed as compute_region launch operands.
+//    acc.par_width ops (each result is `index`) and are passed as
+//    compute_region launch operands (still required to be acc.par_width
+//    results by the compute_region verifier).
 //
 // 2. acc.loop: Converted according to context and attributes:
 //    - Unstructured: body wrapped in scf.execute_region.
diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp
index c5fa50642cade..d18522c3c440f 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp
@@ -78,10 +78,10 @@ ComputeRegionOp buildComputeRegion(Location loc, ValueRange launchArgs,
   assert(mapKeys.size() == inputArgs.size() &&
          "inputArgsToMap must have same size as inputArgs when provided");
 
-  auto parWidthType = ParWidthType::get(rewriter.getContext());
+  Type indexType = rewriter.getIndexType();
   Block *entryBlock = rewriter.createBlock(&computeRegion.getRegion());
   for (size_t i = 0; i < launchArgs.size(); ++i)
-    entryBlock->addArgument(parWidthType, loc);
+    entryBlock->addArgument(indexType, loc);
   for (Value input : inputArgs)
     entryBlock->addArgument(input.getType(), loc);
   for (size_t i = 0; i < inputArgs.size(); ++i)
diff --git a/mlir/test/Dialect/OpenACC/invalid-cg.mlir b/mlir/test/Dialect/OpenACC/invalid-cg.mlir
index f788e6c03bcc9..d218bc505a5ea 100644
--- a/mlir/test/Dialect/OpenACC/invalid-cg.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid-cg.mlir
@@ -22,9 +22,8 @@ scf.parallel (%iv) = (%c0_2) to (%c4_2) step (%c1_2) {
 
 // -----
 
-// expected-note at +1 {{prior use here}}
 %c32 = arith.constant 32 : index
-// expected-error at +1 {{use of value '%c32' expects different type than prior uses: '!acc.par_width' vs 'index'}}
+// expected-error at +1 {{'acc.compute_region' op launch arguments must be results of acc.par_width operations}}
 acc.compute_region launch(%arg0 = %c32) {
   acc.yield
 } {origin = "acc.parallel"}
@@ -38,4 +37,4 @@ acc.compute_region launch(%arg0 = %c32) {
 "acc.compute_region"(%w) <{operandSegmentSizes = array<i32: 1, 0, 0>}> ({
 ^bb0(%arg0: index, %extra: index):
   "acc.yield"() : () -> ()
-}) {origin = "acc.parallel"} : (!acc.par_width) -> ()
+}) {origin = "acc.parallel"} : (index) -> ()
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
index 2940145e40c74..bfbbeb8a5e54d 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/IRMapping.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/IR/OwningOpRef.h"
@@ -145,6 +146,10 @@ TEST_F(OpenACCUtilsCGTest, buildComputeRegionWithLaunchArgs) {
   EXPECT_EQ(cr.getOrigin(), ParallelOp::getOperationName());
   EXPECT_EQ(cr.getLaunchArgs().size(), 1u);
   EXPECT_EQ(cr.getLaunchArgs()[0], pw.getResult());
+  EXPECT_TRUE(llvm::isa<IndexType>(pw.getResult().getType()));
+  ASSERT_FALSE(cr.getRegion().empty());
+  EXPECT_TRUE(llvm::isa<IndexType>(
+      cr.getRegion().front().getArgument(0).getType()));
 
   func::ReturnOp::create(rewriter, loc);
 }

>From 91cd1b1fabf35b4c0cfcb1814accd34996947ed0 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Fri, 20 Mar 2026 09:45:57 -0700
Subject: [PATCH 2/2] Fix format

---
 mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
index bfbbeb8a5e54d..e7e5974ed5c70 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp
@@ -148,8 +148,8 @@ TEST_F(OpenACCUtilsCGTest, buildComputeRegionWithLaunchArgs) {
   EXPECT_EQ(cr.getLaunchArgs()[0], pw.getResult());
   EXPECT_TRUE(llvm::isa<IndexType>(pw.getResult().getType()));
   ASSERT_FALSE(cr.getRegion().empty());
-  EXPECT_TRUE(llvm::isa<IndexType>(
-      cr.getRegion().front().getArgument(0).getType()));
+  EXPECT_TRUE(
+      llvm::isa<IndexType>(cr.getRegion().front().getArgument(0).getType()));
 
   func::ReturnOp::create(rewriter, loc);
 }



More information about the Mlir-commits mailing list