[llvm] [Frontend][OpenMP] Implement directive name parser (PR #146776)

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 2 13:59:44 PDT 2025


https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/146776

>From 0e9eab649f7a515c0697c3fe58309c478108f6b1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Wed, 2 Jul 2025 09:43:32 -0500
Subject: [PATCH] [Frontend][OpenMP] Implement directive name parser

Implement a state machine that consumes tokens (words delimited by white
space), and returns the corresponding directive id, or fails if the tokens
did not form a valid name.
---
 .../Frontend/OpenMP/DirectiveNameParser.h     |  76 ++++++++
 llvm/lib/Frontend/OpenMP/CMakeLists.txt       |   1 +
 .../Frontend/OpenMP/DirectiveNameParser.cpp   |  93 ++++++++++
 llvm/unittests/Frontend/CMakeLists.txt        |   1 +
 .../OpenMPDirectiveNameParserTest.cpp         | 171 ++++++++++++++++++
 5 files changed, 342 insertions(+)
 create mode 100644 llvm/include/llvm/Frontend/OpenMP/DirectiveNameParser.h
 create mode 100644 llvm/lib/Frontend/OpenMP/DirectiveNameParser.cpp
 create mode 100644 llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp

diff --git a/llvm/include/llvm/Frontend/OpenMP/DirectiveNameParser.h b/llvm/include/llvm/Frontend/OpenMP/DirectiveNameParser.h
new file mode 100644
index 0000000000000..db8986601b2ca
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenMP/DirectiveNameParser.h
@@ -0,0 +1,76 @@
+//===- DirectiveNameParser.h  ------------------------------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FRONTEND_OPENMP_DIRECTIVENAMEPARSER_H
+#define LLVM_FRONTEND_OPENMP_DIRECTIVENAMEPARSER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+
+#include <memory>
+
+namespace llvm::omp {
+/// Parser class for OpenMP directive names. It only recognizes names listed
+/// in OMP.td, in particular it does not recognize Fortran's end-directives
+/// if they are not explicitly listed in OMP.td.
+///
+/// The class itself may be a singleton, once it's constructed it never
+/// changes.
+///
+/// Usage:
+/// {
+///   DirectiveNameParser Parser;   // Could be static const.
+///
+///   DirectiveNameParser::State *S = Parser.initial();
+///   for (StringRef Token : Tokens)
+///     S = Parser.apply(S, Token); // Passing nullptr is ok.
+///
+///   if (S == nullptr) {
+///     // Error: ended up in a state from which there is no possible path
+///     // to a successful parse.
+///   } else if (S->Value == OMPD_unknown)
+///     // Parsed a sequence of tokens that are not a complete name, but
+///     // parsing more tokens could lead to a successful parse.
+///   } else {
+///     // Success.
+///     ParsedId = S->Value;
+///   }
+/// }
+struct DirectiveNameParser {
+  DirectiveNameParser(SourceLanguage L = SourceLanguage::C);
+
+  struct State {
+    Directive Value = Directive::OMPD_unknown;
+
+  private:
+    using TransitionMapTy = StringMap<State>;
+    std::unique_ptr<TransitionMapTy> Transition;
+
+    State *next(StringRef Tok);
+    bool isValid() const {
+      return Value != Directive::OMPD_unknown || !Transition->empty();
+    }
+    friend struct DirectiveNameParser;
+  };
+
+  const State *initial() const { return &InitialState; }
+  const State *apply(const State *Current, StringRef Tok) const;
+
+  static SmallVector<StringRef> tokenize(StringRef N);
+
+private:
+  void insertName(StringRef Name, Directive D);
+  State *insertTransition(State *From, StringRef Tok);
+
+  State InitialState;
+};
+} // namespace llvm::omp
+
+#endif // LLVM_FRONTEND_OPENMP_DIRECTIVENAMEPARSER_H
diff --git a/llvm/lib/Frontend/OpenMP/CMakeLists.txt b/llvm/lib/Frontend/OpenMP/CMakeLists.txt
index 5bf15ca3a8991..e60b59c1203b9 100644
--- a/llvm/lib/Frontend/OpenMP/CMakeLists.txt
+++ b/llvm/lib/Frontend/OpenMP/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_component_library(LLVMFrontendOpenMP
   OMP.cpp
   OMPContext.cpp
   OMPIRBuilder.cpp
+  DirectiveNameParser.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend
diff --git a/llvm/lib/Frontend/OpenMP/DirectiveNameParser.cpp b/llvm/lib/Frontend/OpenMP/DirectiveNameParser.cpp
new file mode 100644
index 0000000000000..02ff8327a3054
--- /dev/null
+++ b/llvm/lib/Frontend/OpenMP/DirectiveNameParser.cpp
@@ -0,0 +1,93 @@
+//===- DirectiveNameParser.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/OpenMP/DirectiveNameParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+
+#include <cassert>
+#include <memory>
+
+namespace llvm::omp {
+DirectiveNameParser::DirectiveNameParser(SourceLanguage L) {
+  // Take every directive, get its name in every version, break the name up
+  // into whitespace-separated tokens, and insert each token.
+  for (size_t I = 0, E = Directive_enumSize; I != E; ++I) {
+    auto D = static_cast<Directive>(I);
+    if (D == Directive::OMPD_unknown || !(getDirectiveLanguages(D) & L))
+      continue;
+    for (unsigned Ver : getOpenMPVersions())
+      insertName(getOpenMPDirectiveName(D, Ver), D);
+  }
+}
+
+const DirectiveNameParser::State *
+DirectiveNameParser::apply(const State *Current, StringRef Tok) const {
+  if (!Current)
+    return Current;
+  assert(Current->isValid() && "Invalid input state");
+  if (const State *Next = const_cast<State *>(Current)->next(Tok))
+    return Next->isValid() ? Next : nullptr;
+  return nullptr;
+}
+
+SmallVector<StringRef> DirectiveNameParser::tokenize(StringRef Str) {
+  SmallVector<StringRef> Tokens;
+
+  auto nextChar = [](StringRef N, size_t I) {
+    while (I < N.size() && N[I] == ' ')
+      ++I;
+    return I;
+  };
+  auto nextSpace = [](StringRef N, size_t I) {
+    size_t S = N.find(' ', I);
+    return S != StringRef::npos ? S : N.size();
+  };
+
+  size_t From = nextChar(Str, 0);
+  size_t To = 0;
+
+  while (From != Str.size()) {
+    To = nextSpace(Str, From);
+    Tokens.push_back(Str.substr(From, To - From));
+    From = nextChar(Str, To);
+  }
+
+  return Tokens;
+}
+
+void DirectiveNameParser::insertName(StringRef Name, Directive D) {
+  State *Where = &InitialState;
+
+  for (StringRef Tok : tokenize(Name))
+    Where = insertTransition(Where, Tok);
+
+  Where->Value = D;
+}
+
+DirectiveNameParser::State *
+DirectiveNameParser::insertTransition(State *From, StringRef Tok) {
+  assert(From && "Expecting state");
+  if (!From->Transition) {
+    From->Transition = std::make_unique<State::TransitionMapTy>();
+  }
+  if (State *Next = From->next(Tok))
+    return Next;
+
+  auto [Where, DidIt] = From->Transition->try_emplace(Tok, State());
+  assert(DidIt && "Map insertion failed");
+  return &Where->second;
+}
+
+DirectiveNameParser::State *DirectiveNameParser::State::next(StringRef Tok) {
+  if (!Transition)
+    return nullptr;
+  auto F = Transition->find(Tok);
+  return F != Transition->end() ? &F->second : nullptr;
+}
+} // namespace llvm::omp
diff --git a/llvm/unittests/Frontend/CMakeLists.txt b/llvm/unittests/Frontend/CMakeLists.txt
index 2412cc9d26c7a..281d509227a46 100644
--- a/llvm/unittests/Frontend/CMakeLists.txt
+++ b/llvm/unittests/Frontend/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_unittest(LLVMFrontendTests
   OpenMPCompositionTest.cpp
   OpenMPDecompositionTest.cpp
   OpenMPDirectiveNameTest.cpp
+  OpenMPDirectiveNameParserTest.cpp
 
   DEPENDS
   acc_gen
diff --git a/llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp b/llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp
new file mode 100644
index 0000000000000..11fef684dec4c
--- /dev/null
+++ b/llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp
@@ -0,0 +1,171 @@
+//===- llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/OpenMP/DirectiveNameParser.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+#include "gtest/gtest.h"
+
+#include <cctype>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+using namespace llvm;
+
+static const omp::DirectiveNameParser &getParser() {
+  static omp::DirectiveNameParser Parser(omp::SourceLanguage::C |
+                                         omp::SourceLanguage::Fortran);
+  return Parser;
+}
+
+static std::vector<std::string> tokenize(StringRef S) {
+  std::vector<std::string> Tokens;
+
+  using TokenIterator = std::istream_iterator<std::string>;
+  std::string Copy = S.str();
+  std::istringstream Stream(Copy);
+
+  for (auto I = TokenIterator(Stream), E = TokenIterator(); I != E; ++I)
+    Tokens.push_back(*I);
+  return Tokens;
+}
+
+static std::string &prepareParamName(std::string &Name) {
+  for (size_t I = 0, E = Name.size(); I != E; ++I) {
+    // The parameter name must only have alphanumeric characters.
+    if (!isalnum(Name[I]))
+      Name[I] = 'X';
+  }
+  return Name;
+}
+
+namespace llvm {
+template <> struct enum_iteration_traits<omp::Directive> {
+  static constexpr bool is_iterable = true;
+};
+} // namespace llvm
+
+// Test tokenizing.
+
+class Tokenize : public testing::TestWithParam<omp::Directive> {};
+
+static bool isEqual(const SmallVector<StringRef> &A,
+                    const std::vector<std::string> &B) {
+  if (A.size() != B.size())
+    return false;
+
+  for (size_t I = 0, E = A.size(); I != E; ++I) {
+    if (A[I] != StringRef(B[I]))
+      return false;
+  }
+  return true;
+}
+
+TEST_P(Tokenize, T) {
+  omp::Directive DirId = GetParam();
+  StringRef Name = omp::getOpenMPDirectiveName(DirId, omp::FallbackVersion);
+
+  SmallVector<StringRef> tokens1 = omp::DirectiveNameParser::tokenize(Name);
+  std::vector<std::string> tokens2 = tokenize(Name);
+  ASSERT_TRUE(isEqual(tokens1, tokens2));
+}
+
+static std::string
+getParamName1(const testing::TestParamInfo<Tokenize::ParamType> &Info) {
+  omp::Directive DirId = Info.param;
+  std::string Name =
+      omp::getOpenMPDirectiveName(DirId, omp::FallbackVersion).str();
+  return prepareParamName(Name);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    DirectiveNameParserTest, Tokenize,
+    testing::ValuesIn(
+        llvm::enum_seq(static_cast<omp::Directive>(0),
+                       static_cast<omp::Directive>(omp::Directive_enumSize))),
+    getParamName1);
+
+// Test parsing of valid names.
+
+using ValueType = std::tuple<omp::Directive, unsigned>;
+
+class ParseValid : public testing::TestWithParam<ValueType> {};
+
+TEST_P(ParseValid, T) {
+  auto [DirId, Version] = GetParam();
+  if (DirId == omp::Directive::OMPD_unknown)
+    return;
+
+  std::string Name = omp::getOpenMPDirectiveName(DirId, Version).str();
+
+  // Tokenize and parse
+  auto &Parser = getParser();
+  auto *State = Parser.initial();
+  ASSERT_TRUE(State != nullptr);
+
+  std::vector<std::string> Tokens = tokenize(Name);
+  for (auto &Tok : Tokens) {
+    State = Parser.apply(State, Tok);
+    ASSERT_TRUE(State != nullptr);
+  }
+
+  ASSERT_EQ(State->Value, DirId);
+}
+
+static std::string
+getParamName2(const testing::TestParamInfo<ParseValid::ParamType> &Info) {
+  auto [DirId, Version] = Info.param;
+  std::string Name = omp::getOpenMPDirectiveName(DirId, Version).str() + "v" +
+                     std::to_string(Version);
+  return prepareParamName(Name);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    DirectiveNameParserTest, ParseValid,
+    testing::Combine(testing::ValuesIn(llvm::enum_seq(
+                         static_cast<omp::Directive>(0),
+                         static_cast<omp::Directive>(omp::Directive_enumSize))),
+                     testing::ValuesIn(omp::getOpenMPVersions())),
+    getParamName2);
+
+// Test parsing of invalid names
+
+class ParseInvalid : public testing::TestWithParam<std::string> {};
+
+TEST_P(ParseInvalid, T) {
+  std::string Name = GetParam();
+
+  auto &Parser = getParser();
+  auto *State = Parser.initial();
+  ASSERT_TRUE(State != nullptr);
+
+  std::vector<std::string> Tokens = tokenize(Name);
+  for (auto &Tok : Tokens)
+    State = Parser.apply(State, Tok);
+
+  ASSERT_TRUE(State == nullptr || State->Value == omp::Directive::OMPD_unknown);
+}
+
+namespace {
+using namespace std;
+
+INSTANTIATE_TEST_SUITE_P(DirectiveNameParserTest, ParseInvalid,
+                         testing::Values(
+                             // Names that contain invalid tokens
+                             "bad"s, "target teams invalid"s,
+                             "target sections parallel"s,
+                             "target teams distribute parallel for wrong"s,
+                             // Valid beginning, but not a complete name
+                             "begin declare"s,
+                             // Complete name with extra tokens
+                             "distribute simd target"s));
+} // namespace



More information about the llvm-commits mailing list