[clang] b7c521b - [OpenACC][CIR] Lowering for 'vector_length' on compute constructs

Mon Apr 21 12:47:52 PDT 2025

Author: erichkeane
Date: 2025-04-21T12:47:47-07:00
New Revision: b7c521b922f8b81544ecb0ccff2847644cac3107

URL: https://github.com/llvm/llvm-project/commit/b7c521b922f8b81544ecb0ccff2847644cac3107
DIFF: https://github.com/llvm/llvm-project/commit/b7c521b922f8b81544ecb0ccff2847644cac3107.diff

LOG: [OpenACC][CIR] Lowering for 'vector_length' on compute constructs

This is the same as the 'num_workers', with slightly different names in
places, so we just do the same exact implementation.  This extracts the
implementation as well, which should make it easier to reuse.

Added: 
    

Modified: 
    clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
    clang/test/CIR/CodeGenOpenACC/kernels.c
    clang/test/CIR/CodeGenOpenACC/parallel.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index b79baa96a3fc3..e7dd2e74b0864 100644

--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -82,6 +82,56 @@ class OpenACCClauseCIREmitter final
     return conversionOp.getResult(0);
   }
 
+  mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
+    // '*' case leaves no identifier-info, just a nullptr.
+    if (!ii)
+      return mlir::acc::DeviceType::Star;
+    return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName())
+        .CaseLower("default", mlir::acc::DeviceType::Default)
+        .CaseLower("host", mlir::acc::DeviceType::Host)
+        .CaseLower("multicore", mlir::acc::DeviceType::Multicore)
+        .CasesLower("nvidia", "acc_device_nvidia",
+                    mlir::acc::DeviceType::Nvidia)
+        .CaseLower("radeon", mlir::acc::DeviceType::Radeon);
+  }
+
+  // Handle a clause affected by the 'device-type' to the point that they need
+  // to have the attributes added in the correct/corresponding order, such as
+  // 'num_workers' or 'vector_length' on a compute construct.
+  mlir::ArrayAttr
+  handleDeviceTypeAffectedClause(mlir::ArrayAttr existingDeviceTypes,
+                                 mlir::Value argument,
+                                 mlir::MutableOperandRange &argCollection) {
+    llvm::SmallVector<mlir::Attribute> deviceTypes;
+
+    // Collect the 'existing' device-type attributes so we can re-create them
+    // and insert them.
+    if (existingDeviceTypes) {
+      for (const mlir::Attribute &Attr : existingDeviceTypes)
+        deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
+            builder.getContext(),
+            cast<mlir::acc::DeviceTypeAttr>(Attr).getValue()));
+    }
+
+    // Insert 1 version of the 'expr' to the NumWorkers list per-current
+    // device type.
+    if (lastDeviceTypeClause) {
+      for (const DeviceTypeArgument &arch :
+           lastDeviceTypeClause->getArchitectures()) {
+        deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
+            builder.getContext(), decodeDeviceType(arch.getIdentifierInfo())));
+        argCollection.append(argument);
+      }
+    } else {
+      // Else, we just add a single for 'none'.
+      deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
+          builder.getContext(), mlir::acc::DeviceType::None));
+      argCollection.append(argument);
+    }
+
+    return mlir::ArrayAttr::get(builder.getContext(), deviceTypes);
+  }
+
 public:
   OpenACCClauseCIREmitter(OpTy &operation, CIRGenFunction &cgf,
                           CIRGenBuilderTy &builder,
@@ -112,19 +162,6 @@ class OpenACCClauseCIREmitter final
     }
   }
 
-  mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
-    // '*' case leaves no identifier-info, just a nullptr.
-    if (!ii)
-      return mlir::acc::DeviceType::Star;
-    return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName())
-        .CaseLower("default", mlir::acc::DeviceType::Default)
-        .CaseLower("host", mlir::acc::DeviceType::Host)
-        .CaseLower("multicore", mlir::acc::DeviceType::Multicore)
-        .CasesLower("nvidia", "acc_device_nvidia",
-                    mlir::acc::DeviceType::Nvidia)
-        .CaseLower("radeon", mlir::acc::DeviceType::Radeon);
-  }
-
   void VisitDeviceTypeClause(const OpenACCDeviceTypeClause &clause) {
     lastDeviceTypeClause = &clause;
     if constexpr (isOneOfTypes<OpTy, InitOp, ShutdownOp>) {
@@ -165,38 +202,10 @@ class OpenACCClauseCIREmitter final
 
   void VisitNumWorkersClause(const OpenACCNumWorkersClause &clause) {
     if constexpr (isOneOfTypes<OpTy, ParallelOp, KernelsOp>) {
-      // Collect the 'existing' device-type attributes so we can re-create them
-      // and insert them.
-      llvm::SmallVector<mlir::Attribute> deviceTypes;
-      mlir::ArrayAttr existingDeviceTypes =
-          operation.getNumWorkersDeviceTypeAttr();
-
-      if (existingDeviceTypes) {
-        for (mlir::Attribute Attr : existingDeviceTypes)
-          deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
-              builder.getContext(),
-              cast<mlir::acc::DeviceTypeAttr>(Attr).getValue()));
-      }
-
-      // Insert 1 version of the 'int-expr' to the NumWorkers list per-current
-      // device type.
-      mlir::Value intExpr = createIntExpr(clause.getIntExpr());
-      if (lastDeviceTypeClause) {
-        for (const DeviceTypeArgument &arg :
-             lastDeviceTypeClause->getArchitectures()) {
-          deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
-              builder.getContext(), decodeDeviceType(arg.getIdentifierInfo())));
-          operation.getNumWorkersMutable().append(intExpr);
-        }
-      } else {
-        // Else, we just add a single for 'none'.
-        deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
-            builder.getContext(), mlir::acc::DeviceType::None));
-        operation.getNumWorkersMutable().append(intExpr);
-      }
-
-      operation.setNumWorkersDeviceTypeAttr(
-          mlir::ArrayAttr::get(builder.getContext(), deviceTypes));
+      mlir::MutableOperandRange range = operation.getNumWorkersMutable();
+      operation.setNumWorkersDeviceTypeAttr(handleDeviceTypeAffectedClause(
+          operation.getNumWorkersDeviceTypeAttr(),
+          createIntExpr(clause.getIntExpr()), range));
     } else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
       llvm_unreachable("num_workers not valid on serial");
     } else {
@@ -204,6 +213,19 @@ class OpenACCClauseCIREmitter final
     }
   }
 
+  void VisitVectorLengthClause(const OpenACCVectorLengthClause &clause) {
+    if constexpr (isOneOfTypes<OpTy, ParallelOp, KernelsOp>) {
+      mlir::MutableOperandRange range = operation.getVectorLengthMutable();
+      operation.setVectorLengthDeviceTypeAttr(handleDeviceTypeAffectedClause(
+          operation.getVectorLengthDeviceTypeAttr(),
+          createIntExpr(clause.getIntExpr()), range));
+    } else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
+      llvm_unreachable("vector_length not valid on serial");
+    } else {
+      return clauseNotImplemented(clause);
+    }
+  }
+
   void VisitSelfClause(const OpenACCSelfClause &clause) {
     if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) {
       if (clause.isEmptySelfClause()) {

diff  --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c
index 6459b310546cd..d2da1d18f1534 100644
--- a/clang/test/CIR/CodeGenOpenACC/kernels.c
+++ b/clang/test/CIR/CodeGenOpenACC/kernels.c
@@ -158,5 +158,57 @@ void acc_kernels(int cond) {
   // CHECK-NEXT: acc.terminator
   // CHECK-NEXT: } loc
 
+#pragma acc kernels vector_length(cond)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2u)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
+  // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+  // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(3)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+  // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>], %[[THREE_CAST]] : si32 [#acc.device_type<multicore>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
+#pragma acc kernels device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3)
+  {}
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+  // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.kernels vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
+
   // CHECK-NEXT: cir.return
 }

diff  --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c
index bdb506ee7e1d2..61dccc591c252 100644
--- a/clang/test/CIR/CodeGenOpenACC/parallel.c
+++ b/clang/test/CIR/CodeGenOpenACC/parallel.c
@@ -157,5 +157,57 @@ void acc_parallel(int cond) {
   // CHECK-NEXT: acc.yield
   // CHECK-NEXT: } loc
 
+#pragma acc parallel vector_length(cond)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel vector_length(cond) device_type(nvidia) vector_length(2u)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
+  // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+  // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(4)
+  {}
+  // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i
+  // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[FOUR_CAST]] : si32 [#acc.device_type<radeon>], %[[FOUR_CAST]] : si32 [#acc.device_type<multicore>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3)
+  {}
+  // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+  // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+  // CHECK-NEXT: acc.parallel vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
   // CHECK-NEXT: cir.return
 }