[flang-commits] [flang] f674ddc - [flang] CUDA Fortran - part 5/5: statement semantics

Peter Klausler via flang-commits flang-commits at lists.llvm.org
Thu Jun 1 13:31:48 PDT 2023


Author: Peter Klausler
Date: 2023-06-01T13:31:35-07:00
New Revision: f674ddc19fee67401b78f360e530bb064aa31ed8

URL: https://github.com/llvm/llvm-project/commit/f674ddc19fee67401b78f360e530bb064aa31ed8
DIFF: https://github.com/llvm/llvm-project/commit/f674ddc19fee67401b78f360e530bb064aa31ed8.diff

LOG: [flang] CUDA Fortran - part 5/5: statement semantics

Canonicalize !$CUF KERNEL DO loop nests, similar to OpenACC/OpenMP
canonicalization.  Check statements and expressions in device contexts
for usage that isn't supported.  Add more tests, and include some
tweaks to standard modules needed to build CUDA Fortran modules.

Depends on https://reviews.llvm.org/D150159,
https://reviews.llvm.org/D150161, https://reviews.llvm.org/D150162, &
https://reviews.llvm.org/D150163.

Differential Revision: https://reviews.llvm.org/D150164

Added: 
    flang/lib/Semantics/check-cuda.cpp
    flang/lib/Semantics/check-cuda.h
    flang/test/Semantics/cuf04.cuf
    flang/test/Semantics/cuf05.cuf
    flang/test/Semantics/cuf06.cuf
    flang/test/Semantics/cuf09.cuf

Modified: 
    flang/include/flang/Evaluate/traverse.h
    flang/include/flang/Parser/tools.h
    flang/include/flang/Semantics/semantics.h
    flang/lib/Parser/unparse.cpp
    flang/lib/Semantics/CMakeLists.txt
    flang/lib/Semantics/canonicalize-acc.cpp
    flang/lib/Semantics/check-allocate.cpp
    flang/lib/Semantics/check-deallocate.cpp
    flang/lib/Semantics/resolve-names.cpp
    flang/lib/Semantics/semantics.cpp
    flang/module/__fortran_builtins.f90
    flang/module/iso_c_binding.f90
    flang/test/Driver/compiler_options.f90
    flang/test/Parser/cuf-sanity-unparse.CUF
    flang/tools/f18/CMakeLists.txt
    flang/unittests/Runtime/Time.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h
index 79cef799c8b31..54cdb690ec088 100644
--- a/flang/include/flang/Evaluate/traverse.h
+++ b/flang/include/flang/Evaluate/traverse.h
@@ -38,6 +38,7 @@
 // expression of an ASSOCIATE (or related) construct entity.
 
 #include "expression.h"
+#include "flang/Common/indirection.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/type.h"
 #include <set>
@@ -53,6 +54,10 @@ template <typename Visitor, typename Result> class Traverse {
   Result operator()(const common::Indirection<A, C> &x) const {
     return visitor_(x.value());
   }
+  template <typename A>
+  Result operator()(const common::ForwardOwningPointer<A> &p) const {
+    return visitor_(p.get());
+  }
   template <typename _> Result operator()(const SymbolRef x) const {
     return visitor_(*x);
   }
@@ -76,13 +81,17 @@ template <typename Visitor, typename Result> class Traverse {
       return visitor_.Default();
     }
   }
-  template <typename... A>
-  Result operator()(const std::variant<A...> &u) const {
-    return common::visit(visitor_, u);
+  template <typename... As>
+  Result operator()(const std::variant<As...> &u) const {
+    return common::visit([=](const auto &y) { return visitor_(y); }, u);
   }
   template <typename A> Result operator()(const std::vector<A> &x) const {
     return CombineContents(x);
   }
+  template <typename A, typename B>
+  Result operator()(const std::pair<A, B> &x) const {
+    return Combine(x.first, x.second);
+  }
 
   // Leaves
   Result operator()(const BOZLiteralConstant &) const {
@@ -233,14 +242,24 @@ template <typename Visitor, typename Result> class Traverse {
   template <typename T> Result operator()(const Expr<T> &x) const {
     return visitor_(x.u);
   }
+  Result operator()(const Assignment &x) const {
+    return Combine(x.lhs, x.rhs, x.u);
+  }
+  Result operator()(const Assignment::Intrinsic &) const {
+    return visitor_.Default();
+  }
+  Result operator()(const GenericExprWrapper &x) const { return visitor_(x.v); }
+  Result operator()(const GenericAssignmentWrapper &x) const {
+    return visitor_(x.v);
+  }
 
 private:
   template <typename ITER> Result CombineRange(ITER iter, ITER end) const {
     if (iter == end) {
       return visitor_.Default();
     } else {
-      Result result{visitor_(*iter++)};
-      for (; iter != end; ++iter) {
+      Result result{visitor_(*iter)};
+      for (++iter; iter != end; ++iter) {
         result = visitor_.Combine(std::move(result), visitor_(*iter));
       }
       return result;

diff  --git a/flang/include/flang/Parser/tools.h b/flang/include/flang/Parser/tools.h
index 48c6ab56dd9fd..1e347fab6461a 100644
--- a/flang/include/flang/Parser/tools.h
+++ b/flang/include/flang/Parser/tools.h
@@ -65,6 +65,18 @@ struct UnwrapperHelper {
     return common::visit([](const auto &y) { return Unwrap<A>(y); }, x);
   }
 
+  template <typename A, std::size_t J = 0, typename... Bs>
+  static const A *Unwrap(const std::tuple<Bs...> &x) {
+    if constexpr (J < sizeof...(Bs)) {
+      if (auto result{Unwrap<A>(std::get<J>(x))}) {
+        return result;
+      }
+      return Unwrap<A, (J + 1)>(x);
+    } else {
+      return nullptr;
+    }
+  }
+
   template <typename A, typename B>
   static const A *Unwrap(const std::optional<B> &o) {
     if (o) {
@@ -122,5 +134,120 @@ template <typename A, typename = int> struct HasTypedExpr : std::false_type {};
 template <typename A>
 struct HasTypedExpr<A, decltype(static_cast<void>(A::typedExpr), 0)>
     : std::true_type {};
+
+// GetSource()
+
+template <bool GET_FIRST> struct GetSourceHelper {
+
+  using Result = std::optional<CharBlock>;
+
+  template <typename A> static Result GetSource(A *p) {
+    if (p) {
+      return GetSource(*p);
+    } else {
+      return std::nullopt;
+    }
+  }
+  template <typename A>
+  static Result GetSource(const common::Indirection<A> &x) {
+    return GetSource(x.value());
+  }
+
+  template <typename A, bool COPY>
+  static Result GetSource(const common::Indirection<A, COPY> &x) {
+    return GetSource(x.value());
+  }
+
+  template <typename... As>
+  static Result GetSource(const std::variant<As...> &x) {
+    return common::visit([](const auto &y) { return GetSource(y); }, x);
+  }
+
+  template <std::size_t J = 0, typename... As>
+  static Result GetSource(const std::tuple<As...> &x) {
+    if constexpr (J < sizeof...(As)) {
+      constexpr std::size_t index{GET_FIRST ? J : sizeof...(As) - J - 1};
+      if (auto result{GetSource(std::get<index>(x))}) {
+        return result;
+      }
+      return GetSource<(J + 1)>(x);
+    } else {
+      return {};
+    }
+  }
+
+  template <typename A> static Result GetSource(const std::optional<A> &o) {
+    if (o) {
+      return GetSource(*o);
+    } else {
+      return {};
+    }
+  }
+
+  template <typename A> static Result GetSource(const std::list<A> &x) {
+    if constexpr (GET_FIRST) {
+      for (const A &y : x) {
+        if (auto result{GetSource(y)}) {
+          return result;
+        }
+      }
+    } else {
+      for (auto iter{x.rbegin()}; iter != x.rend(); ++iter) {
+        if (auto result{GetSource(*iter)}) {
+          return result;
+        }
+      }
+    }
+    return {};
+  }
+
+  template <typename A> static Result GetSource(const std::vector<A> &x) {
+    if constexpr (GET_FIRST) {
+      for (const A &y : x) {
+        if (auto result{GetSource(y)}) {
+          return result;
+        }
+      }
+    } else {
+      for (auto iter{x.rbegin()}; iter != x.rend(); ++iter) {
+        if (auto result{GetSource(*iter)}) {
+          return result;
+        }
+      }
+    }
+    return {};
+  }
+
+  template <typename A> static Result GetSource(A &x) {
+    if constexpr (HasSource<A>::value) {
+      return x.source;
+    } else if constexpr (ConstraintTrait<A>) {
+      return GetSource(x.thing);
+    } else if constexpr (WrapperTrait<A>) {
+      return GetSource(x.v);
+    } else if constexpr (UnionTrait<A>) {
+      return GetSource(x.u);
+    } else if constexpr (TupleTrait<A>) {
+      return GetSource(x.t);
+    } else {
+      return {};
+    }
+  }
+};
+
+template <typename A> std::optional<CharBlock> GetSource(const A &x) {
+  return GetSourceHelper<true>::GetSource(x);
+}
+template <typename A> std::optional<CharBlock> GetSource(A &x) {
+  return GetSourceHelper<true>::GetSource(const_cast<const A &>(x));
+}
+
+template <typename A> std::optional<CharBlock> GetLastSource(const A &x) {
+  return GetSourceHelper<false>::GetSource(x);
+}
+template <typename A> std::optional<CharBlock> GetLastSource(A &x) {
+  return GetSourceHelper<false>::GetSource(const_cast<const A &>(x));
+}
+
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_TOOLS_H_

diff  --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index f3846c594dc23..48f37b7d4ce92 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -214,8 +214,9 @@ class SemanticsContext {
   // Defines builtinsScope_ from the __Fortran_builtins module
   void UseFortranBuiltinsModule();
   const Scope *GetBuiltinsScope() const { return builtinsScope_; }
+
   void UsePPCFortranBuiltinTypesModule();
-  const Scope *GetCUDABuiltinsScope();
+  const Scope &GetCUDABuiltinsScope();
   void UsePPCFortranBuiltinsModule();
   Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
   const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -281,7 +282,7 @@ class SemanticsContext {
   std::set<std::string> tempNames_;
   const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
   Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
-  std::optional<const Scope *> CUDABuiltinsScope_; // module __CUDA_builtins
+  std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
   const Scope *ppcBuiltinsScope_{nullptr}; // module __Fortran_PPC_intrinsics
   std::list<parser::Program> modFileParseTrees_;
   std::unique_ptr<CommonBlockMap> commonBlockMap_;

diff  --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 304ff96c66620..ff716236b77e6 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1698,7 +1698,7 @@ class UnparseVisitor {
     Put('('), Walk(std::get<std::list<ActualArgSpec>>(x.v.t), ", "), Put(')');
   }
   void Unparse(const CallStmt &x) { // R1521
-    if (asFortran_ && x.typedCall.get() && !x.chevrons /*CUDA todo*/) {
+    if (asFortran_ && x.typedCall.get()) {
       Put(' ');
       asFortran_->call(out_, *x.typedCall);
       Put('\n');

diff  --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt
index e8022e1261c8e..bfd2f2b979044 100644
--- a/flang/lib/Semantics/CMakeLists.txt
+++ b/flang/lib/Semantics/CMakeLists.txt
@@ -10,6 +10,7 @@ add_flang_library(FortranSemantics
   check-call.cpp
   check-case.cpp
   check-coarray.cpp
+  check-cuda.cpp
   check-data.cpp
   check-deallocate.cpp
   check-declarations.cpp

diff  --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp
index 5afae172cfaa2..c188450a92d46 100644
--- a/flang/lib/Semantics/canonicalize-acc.cpp
+++ b/flang/lib/Semantics/canonicalize-acc.cpp
@@ -65,7 +65,7 @@ class CanonicalizationOfAcc {
 
         const auto &outer{std::get<std::optional<parser::DoConstruct>>(x.t)};
         if (outer->IsDoConcurrent()) {
-          return; // Tile is not allowed on DO CONURRENT
+          return; // Tile is not allowed on DO CONCURRENT
         }
         for (const parser::DoConstruct *loop{&*outer}; loop && tileArgNb > 0;
              --tileArgNb) {

diff  --git a/flang/lib/Semantics/check-allocate.cpp b/flang/lib/Semantics/check-allocate.cpp
index 00ee0d0954f91..ece66444ffb45 100644
--- a/flang/lib/Semantics/check-allocate.cpp
+++ b/flang/lib/Semantics/check-allocate.cpp
@@ -31,6 +31,8 @@ struct AllocateCheckerInfo {
   bool gotTypeSpec{false};
   bool gotSource{false};
   bool gotMold{false};
+  bool gotStream{false};
+  bool gotPinned{false};
 };
 
 class AllocationCheckerHelper {
@@ -179,8 +181,22 @@ static std::optional<AllocateCheckerInfo> CheckAllocateOptions(
               parserSourceExpr = &mold.v.value();
               info.gotMold = true;
             },
-            [](const parser::AllocOpt::Stream &) { /* CUDA coming */ },
-            [](const parser::AllocOpt::Pinned &) { /* CUDA coming */ },
+            [&](const parser::AllocOpt::Stream &stream) { // CUDA
+              if (info.gotStream) {
+                context.Say(
+                    "STREAM may not be duplicated in a ALLOCATE statement"_err_en_US);
+                stopCheckingAllocate = true;
+              }
+              info.gotStream = true;
+            },
+            [&](const parser::AllocOpt::Pinned &pinned) { // CUDA
+              if (info.gotPinned) {
+                context.Say(
+                    "PINNED may not be duplicated in a ALLOCATE statement"_err_en_US);
+                stopCheckingAllocate = true;
+              }
+              info.gotPinned = true;
+            },
         },
         allocOpt.u);
   }
@@ -569,12 +585,13 @@ bool AllocationCheckerHelper::RunChecks(SemanticsContext &context) {
     return false;
   }
   context.CheckIndexVarRedefine(name_);
+  const Scope &subpScope{
+      GetProgramUnitContaining(context.FindScope(name_.source))};
   if (allocateObject_.typedExpr && allocateObject_.typedExpr->v) {
-    if (auto whyNot{
-            WhyNotDefinable(name_.source, context.FindScope(name_.source),
-                {DefinabilityFlag::PointerDefinition,
-                    DefinabilityFlag::AcceptAllocatable},
-                *allocateObject_.typedExpr->v)}) {
+    if (auto whyNot{WhyNotDefinable(name_.source, subpScope,
+            {DefinabilityFlag::PointerDefinition,
+                DefinabilityFlag::AcceptAllocatable},
+            *allocateObject_.typedExpr->v)}) {
       context
           .Say(name_.source,
               "Name in ALLOCATE statement is not definable"_err_en_US)

diff  --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
new file mode 100644
index 0000000000000..c0c6ff4c1a2ba
--- /dev/null
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -0,0 +1,416 @@
+//===-- lib/Semantics/check-cuda.cpp ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "check-cuda.h"
+#include "flang/Common/template.h"
+#include "flang/Evaluate/fold.h"
+#include "flang/Evaluate/traverse.h"
+#include "flang/Parser/parse-tree-visitor.h"
+#include "flang/Parser/parse-tree.h"
+#include "flang/Parser/tools.h"
+#include "flang/Semantics/expression.h"
+#include "flang/Semantics/symbol.h"
+
+// Once labeled DO constructs have been canonicalized and their parse subtrees
+// transformed into parser::DoConstructs, scan the parser::Blocks of the program
+// and merge adjacent CUFKernelDoConstructs and DoConstructs whenever the
+// CUFKernelDoConstruct doesn't already have an embedded DoConstruct.  Also
+// emit errors about improper or missing DoConstructs.
+
+namespace Fortran::parser {
+struct Mutator {
+  template <typename A> bool Pre(A &) { return true; }
+  template <typename A> void Post(A &) {}
+  bool Pre(Block &);
+};
+
+bool Mutator::Pre(Block &block) {
+  for (auto iter{block.begin()}; iter != block.end(); ++iter) {
+    if (auto *kernel{Unwrap<CUFKernelDoConstruct>(*iter)}) {
+      auto &nested{std::get<std::optional<DoConstruct>>(kernel->t)};
+      if (!nested) {
+        if (auto next{iter}; ++next != block.end()) {
+          if (auto *doConstruct{Unwrap<DoConstruct>(*next)}) {
+            nested = std::move(*doConstruct);
+            block.erase(next);
+          }
+        }
+      }
+    } else {
+      Walk(*iter, *this);
+    }
+  }
+  return false;
+}
+} // namespace Fortran::parser
+
+namespace Fortran::semantics {
+
+bool CanonicalizeCUDA(parser::Program &program) {
+  parser::Mutator mutator;
+  parser::Walk(program, mutator);
+  return true;
+}
+
+using MaybeMsg = std::optional<parser::MessageFormattedText>;
+
+// Traverses an evaluate::Expr<> in search of unsupported operations
+// on the device.
+
+struct DeviceExprChecker
+    : public evaluate::AnyTraverse<DeviceExprChecker, MaybeMsg> {
+  using Result = MaybeMsg;
+  using Base = evaluate::AnyTraverse<DeviceExprChecker, Result>;
+  DeviceExprChecker() : Base(*this) {}
+  using Base::operator();
+  Result operator()(const evaluate::ProcedureDesignator &x) const {
+    if (const Symbol * sym{x.GetInterfaceSymbol()}) {
+      const auto *subp{
+          sym->GetUltimate().detailsIf<semantics::SubprogramDetails>()};
+      if (subp) {
+        if (auto attrs{subp->cudaSubprogramAttrs()}) {
+          if (*attrs == common::CUDASubprogramAttrs::HostDevice ||
+              *attrs == common::CUDASubprogramAttrs::Device) {
+            return {};
+          }
+        }
+      }
+    } else if (x.GetSpecificIntrinsic()) {
+      // TODO(CUDA): Check for unsupported intrinsics here
+      return {};
+    }
+    return parser::MessageFormattedText(
+        "'%s' may not be called in device code"_err_en_US, x.GetName());
+  }
+};
+
+template <typename A> static MaybeMsg CheckUnwrappedExpr(const A &x) {
+  if (const auto *expr{parser::Unwrap<parser::Expr>(x)}) {
+    return DeviceExprChecker{}(expr->typedExpr);
+  }
+  return {};
+}
+
+template <typename A>
+static void CheckUnwrappedExpr(
+    SemanticsContext &context, SourceName at, const A &x) {
+  if (const auto *expr{parser::Unwrap<parser::Expr>(x)}) {
+    if (auto msg{DeviceExprChecker{}(expr->typedExpr)}) {
+      context.Say(at, std::move(*msg));
+    }
+  }
+}
+
+template <bool CUF_KERNEL> struct ActionStmtChecker {
+  template <typename A> static MaybeMsg WhyNotOk(const A &x) {
+    if constexpr (ConstraintTrait<A>) {
+      return WhyNotOk(x.thing);
+    } else if constexpr (WrapperTrait<A>) {
+      return WhyNotOk(x.v);
+    } else if constexpr (UnionTrait<A>) {
+      return WhyNotOk(x.u);
+    } else if constexpr (TupleTrait<A>) {
+      return WhyNotOk(x.t);
+    } else {
+      return parser::MessageFormattedText{
+          "Statement may not appear in device code"_err_en_US};
+    }
+  }
+  template <typename A>
+  static MaybeMsg WhyNotOk(const common::Indirection<A> &x) {
+    return WhyNotOk(x.value());
+  }
+  template <typename... As>
+  static MaybeMsg WhyNotOk(const std::variant<As...> &x) {
+    return common::visit([](const auto &x) { return WhyNotOk(x); }, x);
+  }
+  template <std::size_t J = 0, typename... As>
+  static MaybeMsg WhyNotOk(const std::tuple<As...> &x) {
+    if constexpr (J == sizeof...(As)) {
+      return {};
+    } else if (auto msg{WhyNotOk(std::get<J>(x))}) {
+      return msg;
+    } else {
+      return WhyNotOk<(J + 1)>(x);
+    }
+  }
+  template <typename A> static MaybeMsg WhyNotOk(const std::list<A> &x) {
+    for (const auto &y : x) {
+      if (MaybeMsg result{WhyNotOk(y)}) {
+        return result;
+      }
+    }
+    return {};
+  }
+  template <typename A> static MaybeMsg WhyNotOk(const std::optional<A> &x) {
+    if (x) {
+      return WhyNotOk(*x);
+    } else {
+      return {};
+    }
+  }
+  template <typename A>
+  static MaybeMsg WhyNotOk(const parser::UnlabeledStatement<A> &x) {
+    return WhyNotOk(x.statement);
+  }
+  template <typename A>
+  static MaybeMsg WhyNotOk(const parser::Statement<A> &x) {
+    return WhyNotOk(x.statement);
+  }
+  static MaybeMsg WhyNotOk(const parser::AllocateStmt &) {
+    return {}; // AllocateObjects are checked elsewhere
+  }
+  static MaybeMsg WhyNotOk(const parser::AllocateCoarraySpec &) {
+    return parser::MessageFormattedText(
+        "A coarray may not be allocated on the device"_err_en_US);
+  }
+  static MaybeMsg WhyNotOk(const parser::DeallocateStmt &) {
+    return {}; // AllocateObjects are checked elsewhere
+  }
+  static MaybeMsg WhyNotOk(const parser::AssignmentStmt &x) {
+    return DeviceExprChecker{}(x.typedAssignment);
+  }
+  static MaybeMsg WhyNotOk(const parser::CallStmt &x) {
+    return DeviceExprChecker{}(x.typedCall);
+  }
+  static MaybeMsg WhyNotOk(const parser::ContinueStmt &) { return {}; }
+  static MaybeMsg WhyNotOk(const parser::IfStmt &x) {
+    if (auto result{
+            CheckUnwrappedExpr(std::get<parser::ScalarLogicalExpr>(x.t))}) {
+      return result;
+    }
+    return WhyNotOk(
+        std::get<parser::UnlabeledStatement<parser::ActionStmt>>(x.t)
+            .statement);
+  }
+  static MaybeMsg WhyNotOk(const parser::NullifyStmt &x) {
+    for (const auto &y : x.v) {
+      if (MaybeMsg result{DeviceExprChecker{}(y.typedExpr)}) {
+        return result;
+      }
+    }
+    return {};
+  }
+  static MaybeMsg WhyNotOk(const parser::PointerAssignmentStmt &x) {
+    return DeviceExprChecker{}(x.typedAssignment);
+  }
+};
+
+template <bool IsCUFKernelDo> class DeviceContextChecker {
+public:
+  explicit DeviceContextChecker(SemanticsContext &c) : context_{c} {}
+  void CheckSubprogram(const parser::Name &name, const parser::Block &body) {
+    if (name.symbol) {
+      const auto *subp{
+          name.symbol->GetUltimate().detailsIf<SubprogramDetails>()};
+      if (subp && subp->moduleInterface()) {
+        subp = subp->moduleInterface()
+                   ->GetUltimate()
+                   .detailsIf<SubprogramDetails>();
+      }
+      if (subp &&
+          subp->cudaSubprogramAttrs().value_or(
+              common::CUDASubprogramAttrs::Host) !=
+              common::CUDASubprogramAttrs::Host) {
+        Check(body);
+      }
+    }
+  }
+  void Check(const parser::Block &block) {
+    for (const auto &epc : block) {
+      Check(epc);
+    }
+  }
+
+private:
+  void Check(const parser::ExecutionPartConstruct &epc) {
+    common::visit(
+        common::visitors{
+            [&](const parser::ExecutableConstruct &x) { Check(x); },
+            [&](const parser::Statement<common::Indirection<parser::EntryStmt>>
+                    &x) {
+              context_.Say(x.source,
+                  "Device code may not contain an ENTRY statement"_err_en_US);
+            },
+            [](const parser::Statement<common::Indirection<parser::FormatStmt>>
+                    &) {},
+            [](const parser::Statement<common::Indirection<parser::DataStmt>>
+                    &) {},
+            [](const parser::Statement<
+                common::Indirection<parser::NamelistStmt>> &) {},
+            [](const parser::ErrorRecovery &) {},
+        },
+        epc.u);
+  }
+  void Check(const parser::ExecutableConstruct &ec) {
+    common::visit(
+        common::visitors{
+            [&](const parser::Statement<parser::ActionStmt> &stmt) {
+              Check(stmt.statement, stmt.source);
+            },
+            [&](const common::Indirection<parser::DoConstruct> &x) {
+              if (const std::optional<parser::LoopControl> &control{
+                      x.value().GetLoopControl()}) {
+                common::visit([&](const auto &y) { Check(y); }, control->u);
+              }
+              Check(std::get<parser::Block>(x.value().t));
+            },
+            [&](const common::Indirection<parser::BlockConstruct> &x) {
+              Check(std::get<parser::Block>(x.value().t));
+            },
+            [&](const common::Indirection<parser::IfConstruct> &x) {
+              Check(x.value());
+            },
+            [&](const auto &x) {
+              if (auto source{parser::GetSource(x)}) {
+                context_.Say(*source,
+                    "Statement may not appear in device code"_err_en_US);
+              }
+            },
+        },
+        ec.u);
+  }
+  void Check(const parser::ActionStmt &stmt, const parser::CharBlock &source) {
+    common::visit(
+        common::visitors{
+            [&](const auto &x) {
+              if (auto msg{ActionStmtChecker<IsCUFKernelDo>::WhyNotOk(x)}) {
+                context_.Say(source, std::move(*msg));
+              }
+            },
+        },
+        stmt.u);
+  }
+  void Check(const parser::IfConstruct &ic) {
+    const auto &ifS{std::get<parser::Statement<parser::IfThenStmt>>(ic.t)};
+    CheckUnwrappedExpr(context_, ifS.source,
+        std::get<parser::ScalarLogicalExpr>(ifS.statement.t));
+    Check(std::get<parser::Block>(ic.t));
+    for (const auto &eib :
+        std::get<std::list<parser::IfConstruct::ElseIfBlock>>(ic.t)) {
+      const auto &eIfS{std::get<parser::Statement<parser::ElseIfStmt>>(eib.t)};
+      CheckUnwrappedExpr(context_, eIfS.source,
+          std::get<parser::ScalarLogicalExpr>(eIfS.statement.t));
+      Check(std::get<parser::Block>(eib.t));
+    }
+    if (const auto &eb{
+            std::get<std::optional<parser::IfConstruct::ElseBlock>>(ic.t)}) {
+      Check(std::get<parser::Block>(eb->t));
+    }
+  }
+  void Check(const parser::LoopControl::Bounds &bounds) {
+    Check(bounds.lower);
+    Check(bounds.upper);
+    if (bounds.step) {
+      Check(*bounds.step);
+    }
+  }
+  void Check(const parser::LoopControl::Concurrent &x) {
+    const auto &header{std::get<parser::ConcurrentHeader>(x.t)};
+    for (const auto &cc :
+        std::get<std::list<parser::ConcurrentControl>>(header.t)) {
+      Check(std::get<1>(cc.t));
+      Check(std::get<2>(cc.t));
+      if (const auto &step{
+              std::get<std::optional<parser::ScalarIntExpr>>(cc.t)}) {
+        Check(*step);
+      }
+    }
+    if (const auto &mask{
+            std::get<std::optional<parser::ScalarLogicalExpr>>(header.t)}) {
+      Check(*mask);
+    }
+  }
+  void Check(const parser::ScalarLogicalExpr &x) {
+    Check(DEREF(parser::Unwrap<parser::Expr>(x)));
+  }
+  void Check(const parser::ScalarIntExpr &x) {
+    Check(DEREF(parser::Unwrap<parser::Expr>(x)));
+  }
+  void Check(const parser::ScalarExpr &x) {
+    Check(DEREF(parser::Unwrap<parser::Expr>(x)));
+  }
+  void Check(const parser::Expr &expr) {
+    if (MaybeMsg msg{DeviceExprChecker{}(expr.typedExpr)}) {
+      context_.Say(expr.source, std::move(*msg));
+    }
+  }
+
+  SemanticsContext &context_;
+};
+
+void CUDAChecker::Enter(const parser::SubroutineSubprogram &x) {
+  DeviceContextChecker<false>{context_}.CheckSubprogram(
+      std::get<parser::Name>(
+          std::get<parser::Statement<parser::SubroutineStmt>>(x.t).statement.t),
+      std::get<parser::ExecutionPart>(x.t).v);
+}
+
+void CUDAChecker::Enter(const parser::FunctionSubprogram &x) {
+  DeviceContextChecker<false>{context_}.CheckSubprogram(
+      std::get<parser::Name>(
+          std::get<parser::Statement<parser::FunctionStmt>>(x.t).statement.t),
+      std::get<parser::ExecutionPart>(x.t).v);
+}
+
+void CUDAChecker::Enter(const parser::SeparateModuleSubprogram &x) {
+  DeviceContextChecker<false>{context_}.CheckSubprogram(
+      std::get<parser::Statement<parser::MpSubprogramStmt>>(x.t).statement.v,
+      std::get<parser::ExecutionPart>(x.t).v);
+}
+
+// !$CUF KERNEL DO semantic checks
+
+static int DoConstructTightNesting(
+    const parser::DoConstruct *doConstruct, const parser::Block *&innerBlock) {
+  if (!doConstruct || !doConstruct->IsDoNormal()) {
+    return 0;
+  }
+  innerBlock = &std::get<parser::Block>(doConstruct->t);
+  if (innerBlock->size() == 1) {
+    if (const auto *execConstruct{
+            std::get_if<parser::ExecutableConstruct>(&innerBlock->front().u)}) {
+      if (const auto *next{
+              std::get_if<common::Indirection<parser::DoConstruct>>(
+                  &execConstruct->u)}) {
+        return 1 + DoConstructTightNesting(&next->value(), innerBlock);
+      }
+    }
+  }
+  return 1;
+}
+
+void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) {
+  auto source{std::get<parser::CUFKernelDoConstruct::Directive>(x.t).source};
+  const auto &directive{std::get<parser::CUFKernelDoConstruct::Directive>(x.t)};
+  std::int64_t depth{1};
+  if (auto expr{AnalyzeExpr(context_,
+          std::get<std::optional<parser::ScalarIntConstantExpr>>(
+              directive.t))}) {
+    depth = evaluate::ToInt64(expr).value_or(0);
+    if (depth <= 0) {
+      context_.Say(source,
+          "!$CUF KERNEL DO (%jd): loop nesting depth must be positive"_err_en_US,
+          std::intmax_t{depth});
+      depth = 1;
+    }
+  }
+  const parser::DoConstruct *doConstruct{common::GetPtrFromOptional(
+      std::get<std::optional<parser::DoConstruct>>(x.t))};
+  const parser::Block *innerBlock{nullptr};
+  if (DoConstructTightNesting(doConstruct, innerBlock) < depth) {
+    context_.Say(source,
+        "!$CUF KERNEL DO (%jd) must be followed by a DO construct with tightly nested outer levels of counted DO loops"_err_en_US,
+        std::intmax_t{depth});
+  }
+  if (innerBlock) {
+    DeviceContextChecker<true>{context_}.Check(*innerBlock);
+  }
+}
+
+} // namespace Fortran::semantics

diff  --git a/flang/lib/Semantics/check-cuda.h b/flang/lib/Semantics/check-cuda.h
new file mode 100644
index 0000000000000..d863795f16a7c
--- /dev/null
+++ b/flang/lib/Semantics/check-cuda.h
@@ -0,0 +1,50 @@
+//===-- lib/Semantics/check-cuda.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_SEMANTICS_CHECK_CUDA_H_
+#define FORTRAN_SEMANTICS_CHECK_CUDA_H_
+
+#include "flang/Semantics/semantics.h"
+#include <list>
+
+namespace Fortran::parser {
+struct Program;
+class Messages;
+struct Name;
+class CharBlock;
+struct ExecutionPartConstruct;
+struct ExecutableConstruct;
+struct ActionStmt;
+struct IfConstruct;
+struct CUFKernelDoConstruct;
+struct SubroutineSubprogram;
+struct FunctionSubprogram;
+struct SeparateModuleSubprogram;
+} // namespace Fortran::parser
+
+namespace Fortran::semantics {
+
+class SemanticsContext;
+
+class CUDAChecker : public virtual BaseChecker {
+public:
+  explicit CUDAChecker(SemanticsContext &c) : context_{c} {}
+  void Enter(const parser::SubroutineSubprogram &);
+  void Enter(const parser::FunctionSubprogram &);
+  void Enter(const parser::SeparateModuleSubprogram &);
+  void Enter(const parser::CUFKernelDoConstruct &);
+
+private:
+  SemanticsContext &context_;
+};
+
+bool CanonicalizeCUDA(parser::Program &);
+
+} // namespace Fortran::semantics
+
+#endif // FORTRAN_SEMANTICS_CHECK_CUDA_H_

diff  --git a/flang/lib/Semantics/check-deallocate.cpp b/flang/lib/Semantics/check-deallocate.cpp
index 085dbbf67d2c0..2bdded9018f42 100644
--- a/flang/lib/Semantics/check-deallocate.cpp
+++ b/flang/lib/Semantics/check-deallocate.cpp
@@ -19,10 +19,13 @@ namespace Fortran::semantics {
 void DeallocateChecker::Leave(const parser::DeallocateStmt &deallocateStmt) {
   for (const parser::AllocateObject &allocateObject :
       std::get<std::list<parser::AllocateObject>>(deallocateStmt.t)) {
+    parser::CharBlock source;
+    const Symbol *symbol{nullptr};
     common::visit(
         common::visitors{
             [&](const parser::Name &name) {
-              auto const *symbol{name.symbol};
+              source = name.source;
+              symbol = name.symbol;
               if (context_.HasError(symbol)) {
                 // already reported an error
               } else if (!IsVariableName(*symbol)) {
@@ -58,9 +61,10 @@ void DeallocateChecker::Leave(const parser::DeallocateStmt &deallocateStmt) {
             [&](const parser::StructureComponent &structureComponent) {
               // Only perform structureComponent checks if it was successfully
               // analyzed by expression analysis.
+              source = structureComponent.component.source;
+              symbol = structureComponent.component.symbol;
               if (const auto *expr{GetExpr(context_, allocateObject)}) {
-                if (const Symbol *symbol{structureComponent.component.symbol}) {
-                  auto source{structureComponent.component.source};
+                if (symbol) {
                   if (!IsAllocatableOrPointer(*symbol)) { // C932
                     context_.Say(source,
                         "Component in DEALLOCATE statement must have the ALLOCATABLE or POINTER attribute"_err_en_US);

diff  --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 6b503f666af60..0df8a5765124b 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -2740,7 +2740,7 @@ void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol,
         Say(source,
             "'%s' already has another CUDA data attribute ('%s')"_err_en_US,
             symbol.name(),
-            common::EnumToString(*object->cudaDataAttr()).substr());
+            std::string{common::EnumToString(*object->cudaDataAttr())}.c_str());
       } else {
         object->set_cudaDataAttr(attr);
       }
@@ -7700,13 +7700,11 @@ bool ResolveNamesVisitor::Pre(const parser::SpecificationPart &x) {
 
 void ResolveNamesVisitor::UseCUDABuiltinNames() {
   if (FindCUDADeviceContext(&currScope())) {
-    if (const Scope * CUDABuiltins{context().GetCUDABuiltinsScope()}) {
-      for (const auto &[name, symbol] : *CUDABuiltins) {
-        if (!FindInScope(name)) {
-          auto &localSymbol{MakeSymbol(name)};
-          localSymbol.set_details(UseDetails{name, *symbol});
-          localSymbol.flags() = symbol->flags();
-        }
+    for (const auto &[name, symbol] : context().GetCUDABuiltinsScope()) {
+      if (!FindInScope(name)) {
+        auto &localSymbol{MakeSymbol(name)};
+        localSymbol.set_details(UseDetails{name, *symbol});
+        localSymbol.flags() = symbol->flags();
       }
     }
   }

diff  --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index b70cd745ebd8a..7f2f64ecb1bd1 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -16,6 +16,7 @@
 #include "check-arithmeticif.h"
 #include "check-case.h"
 #include "check-coarray.h"
+#include "check-cuda.h"
 #include "check-data.h"
 #include "check-deallocate.h"
 #include "check-declarations.h"
@@ -69,12 +70,13 @@ static void GetSymbolNames(const Scope &scope, NameToSymbolMap &symbols) {
 // children are visited, Leave is called after. No two checkers may have the
 // same Enter or Leave function. Each checker must be constructible from
 // SemanticsContext and have BaseChecker as a virtual base class.
-template <typename... C> class SemanticsVisitor : public virtual C... {
+template <typename... C>
+class SemanticsVisitor : public virtual BaseChecker, public virtual C... {
 public:
-  using C::Enter...;
-  using C::Leave...;
   using BaseChecker::Enter;
   using BaseChecker::Leave;
+  using C::Enter...;
+  using C::Leave...;
   SemanticsVisitor(SemanticsContext &context)
       : C{context}..., context_{context} {}
 
@@ -158,12 +160,14 @@ class MiscChecker : public virtual BaseChecker {
 };
 
 using StatementSemanticsPass1 = ExprChecker;
-using StatementSemanticsPass2 = SemanticsVisitor<AccStructureChecker,
-    AllocateChecker, ArithmeticIfStmtChecker, AssignmentChecker, CaseChecker,
-    CoarrayChecker, DataChecker, DeallocateChecker, DoForallChecker,
-    IfStmtChecker, IoChecker, MiscChecker, NamelistChecker, NullifyChecker,
-    OmpStructureChecker, PurityChecker, ReturnStmtChecker,
-    SelectRankConstructChecker, SelectTypeChecker, StopChecker>;
+using StatementSemanticsPass2 = SemanticsVisitor<AllocateChecker,
+    ArithmeticIfStmtChecker, AssignmentChecker, CaseChecker, CoarrayChecker,
+    DataChecker, DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker,
+    MiscChecker, NamelistChecker, NullifyChecker, PurityChecker,
+    ReturnStmtChecker, SelectRankConstructChecker, SelectTypeChecker,
+    StopChecker>;
+using StatementSemanticsPass3 =
+    SemanticsVisitor<AccStructureChecker, OmpStructureChecker, CUDAChecker>;
 
 static bool PerformStatementSemantics(
     SemanticsContext &context, parser::Program &program) {
@@ -174,6 +178,11 @@ static bool PerformStatementSemantics(
   StatementSemanticsPass1{context}.Walk(program);
   StatementSemanticsPass2 pass2{context};
   pass2.Walk(program);
+  if (context.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC) ||
+      context.languageFeatures().IsEnabled(common::LanguageFeature::OpenMP) ||
+      context.languageFeatures().IsEnabled(common::LanguageFeature::CUDA)) {
+    StatementSemanticsPass3{context}.Walk(program);
+  }
   if (!context.AnyFatalError()) {
     pass2.CompileDataInitializationsIntoInitializers();
   }
@@ -476,11 +485,12 @@ void SemanticsContext::UsePPCFortranBuiltinTypesModule() {
   }
 }
 
-const Scope *SemanticsContext::GetCUDABuiltinsScope() {
-  if (!CUDABuiltinsScope_) {
-    CUDABuiltinsScope_ = GetBuiltinModule("__cuda_builtins");
+const Scope &SemanticsContext::GetCUDABuiltinsScope() {
+  if (!cudaBuiltinsScope_) {
+    cudaBuiltinsScope_ = GetBuiltinModule("__cuda_builtins");
+    CHECK(cudaBuiltinsScope_.value() != nullptr);
   }
-  return *CUDABuiltinsScope_;
+  return **cudaBuiltinsScope_;
 }
 
 void SemanticsContext::UsePPCFortranBuiltinsModule() {
@@ -525,6 +535,7 @@ bool Semantics::Perform() {
       parser::CanonicalizeDo(program_) && // force line break
       CanonicalizeAcc(context_.messages(), program_) &&
       CanonicalizeOmp(context_.messages(), program_) &&
+      CanonicalizeCUDA(program_) &&
       PerformStatementSemantics(context_, program_) &&
       ModFileWriter{context_}.WriteAll();
 }
@@ -566,7 +577,7 @@ void DoDumpSymbols(llvm::raw_ostream &os, const Scope &scope, int indent) {
   if (scope.derivedTypeSpec()) {
     os << " instantiation of " << *scope.derivedTypeSpec();
   }
-  os << '\n';
+  os << " sourceRange=" << scope.sourceRange().size() << " bytes\n";
   ++indent;
   for (const auto &pair : scope) {
     const auto &symbol{*pair.second};

diff  --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90
index 0ff35ccbbbc2f..295ebbef13223 100644
--- a/flang/module/__fortran_builtins.f90
+++ b/flang/module/__fortran_builtins.f90
@@ -75,4 +75,23 @@
 
   intrinsic :: __builtin_compiler_options, __builtin_compiler_version
 
+  interface operator(==)
+    module procedure __builtin_c_ptr_eq
+  end interface
+  interface operator(/=)
+    module procedure __builtin_c_ptr_eq
+  end interface
+
+contains
+
+  elemental logical function __builtin_c_ptr_eq(x, y)
+    type(__builtin_c_ptr), intent(in) :: x, y
+    __builtin_c_ptr_eq = x%__address == y%__address
+  end function
+
+  elemental logical function __builtin_c_ptr_ne(x, y)
+    type(__builtin_c_ptr), intent(in) :: x, y
+    __builtin_c_ptr_ne = x%__address /= y%__address
+  end function
+
 end module

diff  --git a/flang/module/iso_c_binding.f90 b/flang/module/iso_c_binding.f90
index 68035579ec620..a839d949c97ce 100644
--- a/flang/module/iso_c_binding.f90
+++ b/flang/module/iso_c_binding.f90
@@ -15,7 +15,8 @@ module iso_c_binding
     c_ptr => __builtin_c_ptr, &
     c_funptr => __builtin_c_funptr, &
     c_sizeof => sizeof, &
-    c_loc => __builtin_c_loc
+    c_loc => __builtin_c_loc, &
+    operator(==), operator(/=)
 
   type(c_ptr), parameter :: c_null_ptr = c_ptr(0)
   type(c_funptr), parameter :: c_null_funptr = c_funptr(0)

diff  --git a/flang/test/Driver/compiler_options.f90 b/flang/test/Driver/compiler_options.f90
index 7b904ca71f4c3..c329a5033884b 100644
--- a/flang/test/Driver/compiler_options.f90
+++ b/flang/test/Driver/compiler_options.f90
@@ -1,6 +1,6 @@
 ! RUN: %flang -S -emit-llvm -o - %s | FileCheck %s
 ! Test communication of COMPILER_OPTIONS from flang-new to flang-new -fc1.
-! CHECK: [[OPTSVAR:@_QQcl\.[0-9a-f]+]] = internal constant [[[OPTSLEN:[0-9]+]] x i8] c"{{.*}}flang-new{{(\.exe)?}} -S -emit-llvm -o - {{.*}}compiler_options.f90"
+! CHECK: [[OPTSVAR:@_QQcl\.[0-9a-f]+]] = {{[a-z]+}} constant [[[OPTSLEN:[0-9]+]] x i8] c"{{.*}}flang-new{{(\.exe)?}} -S -emit-llvm -o - {{.*}}compiler_options.f90"
 program main
     use ISO_FORTRAN_ENV, only: compiler_options
     implicit none

diff  --git a/flang/test/Parser/cuf-sanity-unparse.CUF b/flang/test/Parser/cuf-sanity-unparse.CUF
index d4495c4fddccf..7ac39448d7bd4 100644
--- a/flang/test/Parser/cuf-sanity-unparse.CUF
+++ b/flang/test/Parser/cuf-sanity-unparse.CUF
@@ -34,9 +34,9 @@ include "cuf-sanity-common"
 !CHECK:   !$CUF KERNEL DO <<<1_4,(2_4,3_4),STREAM=1_4>>>
 !CHECK:   DO j=1_4,10_4
 !CHECK:   END DO
-!CHECK:    CALL globalsub<<<1_4,2_4>>>
-!CHECK:    CALL globalsub<<<1_4,2_4,3_4>>>
-!CHECK:    CALL globalsub<<<1_4,2_4,3_4,4_4>>>
+!CHECK:    CALL globalsub<<<1_4,2_4>>>()
+!CHECK:    CALL globalsub<<<1_4,2_4,3_4>>>()
+!CHECK:    CALL globalsub<<<1_4,2_4,3_4,4_4>>>()
 !CHECK:   ALLOCATE(pa(32_4), STREAM=1_4, PINNED=ispinned)
 !CHECK:  END SUBROUTINE
 !CHECK: END MODULE

diff  --git a/flang/test/Semantics/cuf04.cuf b/flang/test/Semantics/cuf04.cuf
new file mode 100644
index 0000000000000..2e2faa90b490d
--- /dev/null
+++ b/flang/test/Semantics/cuf04.cuf
@@ -0,0 +1,24 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+! CUDA Fortran section 2.5.6 restrictions
+module m
+ contains
+  attributes(device) subroutine devsubr(n)
+    integer, intent(in) :: n
+    !WARNING: 'x1' should not have the SAVE attribute or initialization in a device subprogram
+    real, save :: x1
+    !WARNING: 'x2' should not have the SAVE attribute or initialization in a device subprogram
+    real :: x2 = 1.
+    !ERROR: Device subprogram 'devsubr' cannot call itself
+    if (n > 0) call devsubr(n-1)
+  end subroutine
+  attributes(global) subroutine globsubr
+  end subroutine
+  subroutine boring
+  end subroutine
+  subroutine test
+    !ERROR: 'globsubr' is a kernel subroutine and must be called with kernel launch parameters in chevrons
+    call globsubr
+    !ERROR: Kernel launch parameters in chevrons may not be used unless calling a kernel subroutine
+    call boring<<<1,2>>>
+  end subroutine
+end module

diff  --git a/flang/test/Semantics/cuf05.cuf b/flang/test/Semantics/cuf05.cuf
new file mode 100644
index 0000000000000..a9fd826d53c0b
--- /dev/null
+++ b/flang/test/Semantics/cuf05.cuf
@@ -0,0 +1,19 @@
+! RUN: %flang_fc1 -fdebug-dump-symbols %s 2>&1 | FileCheck --dump-input-context=500 %s
+!CHECK: Global scope: size=0 alignment=1 sourceRange=0 bytes
+!CHECK: IntrinsicModules scope: size=0 alignment=1 sourceRange=0 bytes
+!CHECK: Module scope: __fortran_builtins
+!CHECK: Module scope: __cuda_builtins size=0 alignment=1
+!CHECK: Module scope: __fortran_type_info
+!CHECK: Module scope: m size=0 alignment=1
+!CHECK: Subprogram scope: devsubr size=0 alignment=1
+module m
+  implicit none
+ contains
+  attributes(device) subroutine devsubr()
+  !CHECK: blockdim: Use from blockdim in __cuda_builtins
+  !CHECK: blockidx: Use from blockidx in __cuda_builtins
+  !CHECK: griddim: Use from griddim in __cuda_builtins
+  !CHECK: threadidx: Use from threadidx in __cuda_builtins
+  !CHECK: warpsize: Use from warpsize in __cuda_builtins
+  end subroutine
+end module

diff  --git a/flang/test/Semantics/cuf06.cuf b/flang/test/Semantics/cuf06.cuf
new file mode 100644
index 0000000000000..44dd3c7ccc4c4
--- /dev/null
+++ b/flang/test/Semantics/cuf06.cuf
@@ -0,0 +1,15 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+module m
+  use, intrinsic :: __fortran_builtins, only: __builtin_dim3
+ contains
+  attributes(global) subroutine kernel
+  end subroutine
+  subroutine test
+    call kernel<<< 1, 32 >>> ! ok
+    call kernel<<< __builtin_dim3(1,1), __builtin_dim3(32,1,1) >>> ! ok
+    !ERROR: Kernel launch grid parameter must be either integer or TYPE(dim3)
+    call kernel<<< 1.d0, 32 >>>
+    !ERROR: Kernel launch block parameter must be either integer or TYPE(dim3)
+    call kernel<<< 1, "abc" >>>
+  end
+end module

diff  --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf
new file mode 100644
index 0000000000000..dd70c3b1ff5ef
--- /dev/null
+++ b/flang/test/Semantics/cuf09.cuf
@@ -0,0 +1,76 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+module m
+ contains
+  attributes(device) subroutine devsub
+    !ERROR: Statement may not appear in device code
+    !$cuf kernel do <<< 1, 2 >>>
+    do k=1,10
+    end do
+  end
+end
+
+program main
+  !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops
+  !$cuf kernel do <<< 1, 2 >>>
+  do while (.false.)
+  end do
+  !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops
+  !$cuf kernel do <<< 1, 2 >>>
+  do
+    exit
+  end do
+  !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops
+  !$cuf kernel do <<< 1, 2 >>>
+  do concurrent (j=1:10)
+  end do
+  !$cuf kernel do <<< 1, 2 >>>
+  do 1 j=1,10
+1 continue ! ok
+  !$cuf kernel do <<< 1, 2 >>>
+  do j=1,10
+  end do ! ok
+  !$cuf kernel do <<< 1, 2 >>>
+  do j=1,10
+    !ERROR: Statement may not appear in device code
+    !$cuf kernel do <<< 1, 2 >>>
+    do k=1,10
+    end do
+  end do
+  !ERROR: !$CUF KERNEL DO (-1): loop nesting depth must be positive
+  !$cuf kernel do (-1) <<< 1, 2 >>>
+  do j=1,10
+  end do
+  !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops
+  !$cuf kernel do <<< 1, 2 >>>
+  continue
+  !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops
+  !$cuf kernel do (2) <<< 1, 2 >>>
+  do j=1,10
+  end do
+  !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops
+  !$cuf kernel do (2) <<< 1, 2 >>>
+  do j=1,10
+    continue
+  end do
+  !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops
+  !$cuf kernel do (2) <<< 1, 2 >>>
+  do j=1,10
+    do k=1,10
+    end do
+    continue
+  end do
+  !$cuf kernel do <<< 1, 2 >>>
+  do j = 1, 10
+    !ERROR: 'foo' may not be called in device code
+    call foo
+    !ERROR: 'bar' may not be called in device code
+    x = bar()
+    !ERROR: 'ifunc' may not be called in device code
+    if (ifunc() /= 0) continue
+    !ERROR: 'ifunc' may not be called in device code
+    if (ifunc() /= 0) then
+    !ERROR: 'ifunc' may not be called in device code
+    else if (ifunc() /= 1) then
+    end if
+  end do
+end

diff  --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index b76d01c284a36..d4e882d3e4ec5 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -10,6 +10,7 @@ set(MODULES
   "__fortran_type_info"
   "__fortran_ppc_types"
   "__fortran_ppc_intrinsics"
+  "__cuda_builtins"
   "ieee_arithmetic"
   "ieee_exceptions"
   "ieee_features"

diff  --git a/flang/unittests/Runtime/Time.cpp b/flang/unittests/Runtime/Time.cpp
index ceccb4a70805c..ec0caa743bcdf 100644
--- a/flang/unittests/Runtime/Time.cpp
+++ b/flang/unittests/Runtime/Time.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef __clang__ // 16.0.3 lacks <charconv>
+
 #include "gtest/gtest.h"
 #include "flang/Runtime/time-intrinsic.h"
 #include <algorithm>
@@ -166,3 +168,4 @@ TEST(TimeIntrinsics, DateAndTime) {
     EXPECT_LE(minutes, 59);
   }
 }
+#endif // __clang__


        


More information about the flang-commits mailing list