[clang] 08a2207 - Reapply "[OpenMP] Add the `ompx_attribute` clause for target directives"

Tue Jul 25 10:46:38 PDT 2023

Author: Johannes Doerfert
Date: 2023-07-25T10:40:35-07:00
New Revision: 08a220764b1e266c4694f614fd4fda7bd2122580

URL: https://github.com/llvm/llvm-project/commit/08a220764b1e266c4694f614fd4fda7bd2122580
DIFF: https://github.com/llvm/llvm-project/commit/08a220764b1e266c4694f614fd4fda7bd2122580.diff

LOG: Reapply "[OpenMP] Add the `ompx_attribute` clause for target directives"

This reverts commit 0d12683046ca75fb08e285f4622f2af5c82609dc and
reapplies ef9ec4bbcca2fa4f64df47bc426f1d1c59ea47e2 with an extension to
fix the Flang build.

Differential Revision: https://reviews.llvm.org/D156184

Added: 
    clang/test/OpenMP/ompx_attributes_codegen.cpp
    clang/test/OpenMP/ompx_attributes_messages.cpp

Modified: 
    clang/include/clang/AST/OpenMPClause.h
    clang/include/clang/AST/RecursiveASTVisitor.h
    clang/include/clang/Basic/DiagnosticGroups.td
    clang/include/clang/Basic/DiagnosticParseKinds.td
    clang/include/clang/Parse/Parser.h
    clang/include/clang/Sema/Sema.h
    clang/lib/AST/OpenMPClause.cpp
    clang/lib/AST/StmtProfile.cpp
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/lib/CodeGen/CodeGenModule.h
    clang/lib/CodeGen/Targets/AMDGPU.cpp
    clang/lib/CodeGen/Targets/NVPTX.cpp
    clang/lib/Parse/ParseOpenMP.cpp
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/lib/Sema/SemaOpenMP.cpp
    clang/lib/Sema/TreeTransform.h
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTWriter.cpp
    clang/tools/libclang/CIndex.cpp
    flang/lib/Semantics/check-omp-structure.cpp
    llvm/include/llvm/Frontend/OpenMP/OMP.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 0bea21270692cf..31ae3d42e232fc 100644

--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -9172,6 +9172,54 @@ class OMPDoacrossClause final
   }
 };
 
+/// This represents 'ompx_attribute' clause in a directive that might generate
+/// an outlined function. An example is given below.
+///
+/// \code
+/// #pragma omp target [...] ompx_attribute(flatten)
+/// \endcode
+class OMPXAttributeClause
+    : public OMPNoChildClause<llvm::omp::OMPC_ompx_attribute> {
+  friend class OMPClauseReader;
+
+  /// Location of '('.
+  SourceLocation LParenLoc;
+
+  /// The parsed attributes (clause arguments)
+  SmallVector<const Attr *> Attrs;
+
+public:
+  /// Build 'ompx_attribute' clause.
+  ///
+  /// \param Attrs The parsed attributes (clause arguments)
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  OMPXAttributeClause(ArrayRef<const Attr *> Attrs, SourceLocation StartLoc,
+                      SourceLocation LParenLoc, SourceLocation EndLoc)
+      : OMPNoChildClause(StartLoc, EndLoc), LParenLoc(LParenLoc), Attrs(Attrs) {
+  }
+
+  /// Build an empty clause.
+  OMPXAttributeClause() : OMPNoChildClause() {}
+
+  /// Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+  /// Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// Returned the attributes parsed from this clause.
+  ArrayRef<const Attr *> getAttrs() const { return Attrs; }
+
+private:
+  /// Replace the attributes with \p NewAttrs.
+  void setAttrs(ArrayRef<Attr *> NewAttrs) {
+    Attrs.clear();
+    Attrs.append(NewAttrs.begin(), NewAttrs.end());
+  }
+};
+
 } // namespace clang
 
 #endif // LLVM_CLANG_AST_OPENMPCLAUSE_H

diff  --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 604875cd6337a4..fc2d1ff708bf7a 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3875,6 +3875,12 @@ bool RecursiveASTVisitor<Derived>::VisitOMPDoacrossClause(
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPXAttributeClause(
+    OMPXAttributeClause *C) {
+  return true;
+}
+
 // FIXME: look at the following tricky-seeming exprs to see if we
 // need to recurse on anything.  These are ones that have methods
 // returning decls or qualtypes or nestednamespecifier -- though I'm

diff  --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 7b4d415bf06494..6a0a01e4a981a4 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1278,9 +1278,10 @@ def OpenMPMapping : DiagGroup<"openmp-mapping">;
 def OpenMPTarget : DiagGroup<"openmp-target", [OpenMPMapping]>;
 def OpenMPPre51Compat : DiagGroup<"pre-openmp-51-compat">;
 def OpenMP51Ext : DiagGroup<"openmp-51-extensions">;
+def OpenMPExtensions : DiagGroup<"openmp-extensions">;
 def OpenMP : DiagGroup<"openmp", [
     SourceUsesOpenMP, OpenMPClauses, OpenMPLoopForm, OpenMPTarget,
-    OpenMPMapping, OpenMP51Ext
+    OpenMPMapping, OpenMP51Ext, OpenMPExtensions
   ]>;
 
 // Backend warnings.

diff  --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 8d729c31641ed8..a804442ab34ec7 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1540,6 +1540,9 @@ def warn_omp_more_one_omp_all_memory : Warning<
   InGroup<OpenMPClauses>;
 def warn_omp_depend_in_ordered_deprecated : Warning<"'depend' clause for"
   " 'ordered' is deprecated; use 'doacross' instead">, InGroup<Deprecated>;
+def warn_omp_invalid_attribute_for_ompx_attributes : Warning<"'ompx_attribute' clause only allows "
+  "'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; "
+  "%0 is ignored">, InGroup<OpenMPExtensions>;
 
 // Pragma loop support.
 def err_pragma_loop_missing_argument : Error<

diff  --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 475dfe845528d9..b58041477c9ec9 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3490,6 +3490,13 @@ class Parser : public CodeCompletionHandler {
   //
   OMPClause *ParseOpenMPInteropClause(OpenMPClauseKind Kind, bool ParseOnly);
 
+  /// Parses a ompx_attribute clause
+  ///
+  /// \param ParseOnly true to skip the clause's semantic actions and return
+  /// nullptr.
+  //
+  OMPClause *ParseOpenMPOMPXAttributesClause(bool ParseOnly);
+
 public:
   /// Parses simple expression in parens for single-expression clauses of OpenMP
   /// constructs.

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 3418a37b307785..7c641d5e273a03 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10988,6 +10988,11 @@ class Sema final {
   bool ConstantFoldAttrArgs(const AttributeCommonInfo &CI,
                             MutableArrayRef<Expr *> Args);
 
+  /// Create an CUDALaunchBoundsAttr attribute.
+  CUDALaunchBoundsAttr *CreateLaunchBoundsAttr(const AttributeCommonInfo &CI,
+                                               Expr *MaxThreads,
+                                               Expr *MinBlocks);
+
   /// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular
   /// declaration.
   void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
@@ -11004,11 +11009,21 @@ class Sema final {
   void AddXConsumedAttr(Decl *D, const AttributeCommonInfo &CI,
                         RetainOwnershipKind K, bool IsTemplateInstantiation);
 
+  /// Create an AMDGPUWavesPerEUAttr attribute.
+  AMDGPUFlatWorkGroupSizeAttr *
+  CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min,
+                                    Expr *Max);
+
   /// addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size
   /// attribute to a particular declaration.
   void addAMDGPUFlatWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI,
                                       Expr *Min, Expr *Max);
 
+  /// Create an AMDGPUWavesPerEUAttr attribute.
+  AMDGPUWavesPerEUAttr *
+  CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI, Expr *Min,
+                             Expr *Max);
+
   /// addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a
   /// particular declaration.
   void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
@@ -12341,6 +12356,12 @@ class Sema final {
                             ArrayRef<Expr *> VarList, SourceLocation StartLoc,
                             SourceLocation LParenLoc, SourceLocation EndLoc);
 
+  /// Called on a well-formed 'ompx_attribute' clause.
+  OMPClause *ActOnOpenMPXAttributeClause(ArrayRef<const Attr *> Attrs,
+                                         SourceLocation StartLoc,
+                                         SourceLocation LParenLoc,
+                                         SourceLocation EndLoc);
+
   /// The kind of conversion being performed.
   enum CheckedConversionKind {
     /// An implicit conversion.

diff  --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 4c895822ffdf85..f5ad75028a641e 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -2534,6 +2534,18 @@ void OMPClausePrinter::VisitOMPDoacrossClause(OMPDoacrossClause *Node) {
   OS << ")";
 }
 
+void OMPClausePrinter::VisitOMPXAttributeClause(OMPXAttributeClause *Node) {
+  OS << "ompx_attribute(";
+  bool IsFirst = true;
+  for (auto &Attr : Node->getAttrs()) {
+    if (!IsFirst)
+      OS << ", ";
+    Attr->printPretty(OS, Policy);
+    IsFirst = false;
+  }
+  OS << ")";
+}
+
 void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
                                          VariantMatchInfo &VMI) const {
   for (const OMPTraitSet &Set : Sets) {

diff  --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index d8a667b2d0fdc4..60646f7a0da57c 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -928,6 +928,8 @@ void OMPClauseProfiler::VisitOMPXDynCGroupMemClause(
 void OMPClauseProfiler::VisitOMPDoacrossClause(const OMPDoacrossClause *C) {
   VisitOMPClauseList(C);
 }
+void OMPClauseProfiler::VisitOMPXAttributeClause(const OMPXAttributeClause *C) {
+}
 } // namespace
 
 void

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a52ec8909b12ac..a4f7eb96c0d9bc 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6110,8 +6110,23 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
                                       DefaultValTeams, DefaultValThreads,
                                       IsOffloadEntry, OutlinedFn, OutlinedFnID);
 
-  if (OutlinedFn != nullptr)
-    CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
+  if (!OutlinedFn)
+    return;
+
+  CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
+
+  for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
+    for (auto *A : C->getAttrs()) {
+      if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
+        CGM.handleCUDALaunchBoundsAttr(OutlinedFn, Attr);
+      else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
+        CGM.handleAMDGPUFlatWorkGroupSizeAttr(OutlinedFn, Attr);
+      else if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
+        CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
+      else
+        llvm_unreachable("Unexpected attribute kind");
+    }
+  }
 }
 
 /// Checks if the expression is constant or does not have non-trivial function

diff  --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 05cb217e2bee4e..f5fd94492540f2 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1557,6 +1557,21 @@ class CodeGenModule : public CodeGenTypeCache {
   /// because we'll lose all important information after each repl.
   void moveLazyEmissionStates(CodeGenModule *NewBuilder);
 
+  /// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F.
+  void handleCUDALaunchBoundsAttr(llvm::Function *F,
+                                  const CUDALaunchBoundsAttr *A);
+
+  /// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute
+  /// to \p F. Alternatively, the work group size can be taken from a \p
+  /// ReqdWGS.
+  void handleAMDGPUFlatWorkGroupSizeAttr(
+      llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A,
+      const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr);
+
+  /// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F.
+  void handleAMDGPUWavesPerEUAttr(llvm::Function *F,
+                                  const AMDGPUWavesPerEUAttr *A);
+
 private:
   llvm::Constant *GetOrCreateLLVMFunction(
       StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,

diff  --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 796a2be81a09c7..bac7787643e33a 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -317,26 +317,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
 
   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
   if (ReqdWGS || FlatWGS) {
-    unsigned Min = 0;
-    unsigned Max = 0;
-    if (FlatWGS) {
-      Min = FlatWGS->getMin()
-                ->EvaluateKnownConstInt(M.getContext())
-                .getExtValue();
-      Max = FlatWGS->getMax()
-                ->EvaluateKnownConstInt(M.getContext())
-                .getExtValue();
-    }
-    if (ReqdWGS && Min == 0 && Max == 0)
-      Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
-
-    if (Min != 0) {
-      assert(Min <= Max && "Min must be less than or equal Max");
-
-      std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
-      F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
-    } else
-      assert(Max == 0 && "Max must be zero");
+    M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS);
   } else if (IsOpenCLKernel || IsHIPKernel) {
     // By default, restrict the maximum size to a value specified by
     // --gpu-max-threads-per-block=n or its default value for HIP.
@@ -349,24 +330,8 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
     F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
   }
 
-  if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
-    unsigned Min =
-        Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue();
-    unsigned Max = Attr->getMax() ? Attr->getMax()
-                                        ->EvaluateKnownConstInt(M.getContext())
-                                        .getExtValue()
-                                  : 0;
-
-    if (Min != 0) {
-      assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
-
-      std::string AttrVal = llvm::utostr(Min);
-      if (Max != 0)
-        AttrVal = AttrVal + "," + llvm::utostr(Max);
-      F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
-    } else
-      assert(Max == 0 && "Max must be zero");
-  }
+  if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>())
+    M.handleAMDGPUWavesPerEUAttr(F, Attr);
 
   if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
     unsigned NumSGPR = Attr->getNumSGPR();
@@ -595,6 +560,47 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
   return F;
 }
 
+void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr(
+    llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
+    const ReqdWorkGroupSizeAttr *ReqdWGS) {
+  unsigned Min = 0;
+  unsigned Max = 0;
+  if (FlatWGS) {
+    Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
+    Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue();
+  }
+  if (ReqdWGS && Min == 0 && Max == 0)
+    Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
+
+  if (Min != 0) {
+    assert(Min <= Max && "Min must be less than or equal Max");
+
+    std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
+    F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
+  } else
+    assert(Max == 0 && "Max must be zero");
+}
+
+void CodeGenModule::handleAMDGPUWavesPerEUAttr(
+    llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) {
+  unsigned Min =
+      Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
+  unsigned Max =
+      Attr->getMax()
+          ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue()
+          : 0;
+
+  if (Min != 0) {
+    assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
+
+    std::string AttrVal = llvm::utostr(Min);
+    if (Max != 0)
+      AttrVal = AttrVal + "," + llvm::utostr(Max);
+    F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
+  } else
+    assert(Max == 0 && "Max must be zero");
+}
+
 std::unique_ptr<TargetCodeGenInfo>
 CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) {
   return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes());

diff  --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 1ca0192333a0f8..0d4bbd79564800 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -71,12 +71,12 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
     return true;
   }
 
-private:
   // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
   // resulting MDNode to the nvvm.annotations MDNode.
   static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
                               int Operand);
 
+private:
   static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
                                            LValue Src) {
     llvm::Value *Handle = nullptr;
@@ -256,24 +256,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
       // Create !{<func-ref>, metadata !"kernel", i32 1} node
       addNVVMMetadata(F, "kernel", 1);
     }
-    if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
-      // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
-      llvm::APSInt MaxThreads(32);
-      MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
-      if (MaxThreads > 0)
-        addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
-
-      // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
-      // not specified in __launch_bounds__ or if the user specified a 0 value,
-      // we don't have to add a PTX directive.
-      if (Attr->getMinBlocks()) {
-        llvm::APSInt MinBlocks(32);
-        MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
-        if (MinBlocks > 0)
-          // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
-          addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
-      }
-    }
+    if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
+      M.handleCUDALaunchBoundsAttr(F, Attr);
   }
 
   // Attach kernel metadata directly if compiling for NVPTX.
@@ -303,6 +287,28 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
 }
 }
 
+void CodeGenModule::handleCUDALaunchBoundsAttr(
+    llvm::Function *F, const CUDALaunchBoundsAttr *Attr) {
+  // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+  llvm::APSInt MaxThreads(32);
+  MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
+  if (MaxThreads > 0)
+    NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
+                                            MaxThreads.getExtValue());
+
+  // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
+  // not specified in __launch_bounds__ or if the user specified a 0 value,
+  // we don't have to add a PTX directive.
+  if (Attr->getMinBlocks()) {
+    llvm::APSInt MinBlocks(32);
+    MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
+    if (MinBlocks > 0)
+      // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+      NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
+                                              MinBlocks.getExtValue());
+  }
+}
+
 std::unique_ptr<TargetCodeGenInfo>
 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
   return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());

diff  --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 96d2e2cede6289..66cabb19423348 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3411,6 +3411,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
           << getOpenMPClauseName(CKind) << getOpenMPDirectiveName(DKind);
     SkipUntil(tok::comma, tok::annot_pragma_openmp_end, StopBeforeMatch);
     break;
+  case OMPC_ompx_attribute:
+    Clause = ParseOpenMPOMPXAttributesClause(WrongDirective);
+    break;
   default:
     break;
   }
@@ -3691,6 +3694,62 @@ OMPClause *Parser::ParseOpenMPInteropClause(OpenMPClauseKind Kind,
   llvm_unreachable("Unexpected interop variable clause.");
 }
 
+OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) {
+  SourceLocation Loc = ConsumeToken();
+  // Parse '('.
+  BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+  if (T.expectAndConsume(diag::err_expected_lparen_after,
+                         getOpenMPClauseName(OMPC_ompx_attribute).data()))
+    return nullptr;
+
+  ParsedAttributes ParsedAttrs(AttrFactory);
+  ParseAttributes(PAKM_GNU | PAKM_CXX11, ParsedAttrs);
+
+  // Parse ')'.
+  if (T.consumeClose())
+    return nullptr;
+
+  if (ParseOnly)
+    return nullptr;
+
+  SmallVector<Attr *> Attrs;
+  for (const ParsedAttr &PA : ParsedAttrs) {
+    switch (PA.getKind()) {
+    case ParsedAttr::AT_AMDGPUFlatWorkGroupSize:
+      if (!PA.checkExactlyNumArgs(Actions, 2))
+        continue;
+      if (auto *A = Actions.CreateAMDGPUFlatWorkGroupSizeAttr(
+              PA, PA.getArgAsExpr(0), PA.getArgAsExpr(1)))
+        Attrs.push_back(A);
+      continue;
+    case ParsedAttr::AT_AMDGPUWavesPerEU:
+      if (!PA.checkAtLeastNumArgs(Actions, 1) ||
+          !PA.checkAtMostNumArgs(Actions, 2))
+        continue;
+      if (auto *A = Actions.CreateAMDGPUWavesPerEUAttr(
+              PA, PA.getArgAsExpr(0),
+              PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr))
+        Attrs.push_back(A);
+      continue;
+    case ParsedAttr::AT_CUDALaunchBounds:
+      if (!PA.checkAtLeastNumArgs(Actions, 1) ||
+          !PA.checkAtMostNumArgs(Actions, 2))
+        continue;
+      if (auto *A = Actions.CreateLaunchBoundsAttr(
+              PA, PA.getArgAsExpr(0),
+              PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr))
+        Attrs.push_back(A);
+      continue;
+    default:
+      Diag(Loc, diag::warn_omp_invalid_attribute_for_ompx_attributes) << PA;
+      continue;
+    };
+  }
+
+  return Actions.ActOnOpenMPXAttributeClause(Attrs, Loc, T.getOpenLocation(),
+                                             T.getCloseLocation());
+}
+
 /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'.
 ///
 ///    default-clause:

diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index ed69e802c95dd5..429fa12ff2e293 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5633,21 +5633,28 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,
   return ValArg.getAs<Expr>();
 }
 
-void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
-                               Expr *MaxThreads, Expr *MinBlocks) {
+CUDALaunchBoundsAttr *
+Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads,
+                             Expr *MinBlocks) {
   CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks);
   MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0);
   if (MaxThreads == nullptr)
-    return;
+    return nullptr;
 
   if (MinBlocks) {
     MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1);
     if (MinBlocks == nullptr)
-      return;
+      return nullptr;
   }
 
-  D->addAttr(::new (Context)
-                 CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks));
+  return ::new (Context)
+      CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks);
+}
+
+void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
+                               Expr *MaxThreads, Expr *MinBlocks) {
+  if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks))
+    D->addAttr(Attr);
 }
 
 static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
@@ -7862,16 +7869,22 @@ checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr,
   return false;
 }
 
-void Sema::addAMDGPUFlatWorkGroupSizeAttr(Decl *D,
-                                          const AttributeCommonInfo &CI,
-                                          Expr *MinExpr, Expr *MaxExpr) {
+AMDGPUFlatWorkGroupSizeAttr *
+Sema::CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI,
+                                        Expr *MinExpr, Expr *MaxExpr) {
   AMDGPUFlatWorkGroupSizeAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
 
   if (checkAMDGPUFlatWorkGroupSizeArguments(*this, MinExpr, MaxExpr, TmpAttr))
-    return;
+    return nullptr;
+  return ::new (Context)
+      AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr);
+}
 
-  D->addAttr(::new (Context)
-                 AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr));
+void Sema::addAMDGPUFlatWorkGroupSizeAttr(Decl *D,
+                                          const AttributeCommonInfo &CI,
+                                          Expr *MinExpr, Expr *MaxExpr) {
+  if (auto *Attr = CreateAMDGPUFlatWorkGroupSizeAttr(CI, MinExpr, MaxExpr))
+    D->addAttr(Attr);
 }
 
 static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D,
@@ -7916,15 +7929,21 @@ static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr,
   return false;
 }
 
-void Sema::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
-                                   Expr *MinExpr, Expr *MaxExpr) {
+AMDGPUWavesPerEUAttr *
+Sema::CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI, Expr *MinExpr,
+                                 Expr *MaxExpr) {
   AMDGPUWavesPerEUAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
 
   if (checkAMDGPUWavesPerEUArguments(*this, MinExpr, MaxExpr, TmpAttr))
-    return;
+    return nullptr;
+
+  return ::new (Context) AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr);
+}
 
-  D->addAttr(::new (Context)
-                 AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr));
+void Sema::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
+                                   Expr *MinExpr, Expr *MaxExpr) {
+  if (auto *Attr = CreateAMDGPUWavesPerEUAttr(CI, MinExpr, MaxExpr))
+    D->addAttr(Attr);
 }
 
 static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D, const ParsedAttr &AL) {

diff  --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index cf805987b378e3..3954bf2ad28041 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -30,6 +30,7 @@
 #include "clang/Sema/EnterExpressionEvaluationContext.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaInternal.h"
@@ -24093,3 +24094,10 @@ OMPClause *Sema::ActOnOpenMPDoacrossClause(
     DSAStack->addDoacrossDependClause(C, OpsOffs);
   return C;
 }
+
+OMPClause *Sema::ActOnOpenMPXAttributeClause(ArrayRef<const Attr *> Attrs,
+                                             SourceLocation StartLoc,
+                                             SourceLocation LParenLoc,
+                                             SourceLocation EndLoc) {
+  return new (Context) OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc);
+}

diff  --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 10b3587885e39f..a73b54b668a415 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -2377,6 +2377,18 @@ class TreeTransform {
                                                     EndLoc);
   }
 
+  /// Build a new OpenMP 'ompx_attribute' clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide 
diff erent behavior.
+  OMPClause *RebuildOMPXAttributeClause(ArrayRef<const Attr *> Attrs,
+                                        SourceLocation StartLoc,
+                                        SourceLocation LParenLoc,
+                                        SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPXAttributeClause(Attrs, StartLoc, LParenLoc,
+                                                 EndLoc);
+  }
+
   /// Build a new OpenMP 'align' clause.
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
@@ -10756,6 +10768,16 @@ TreeTransform<Derived>::TransformOMPDoacrossClause(OMPDoacrossClause *C) {
       C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPXAttributeClause(OMPXAttributeClause *C) {
+  SmallVector<const Attr *> NewAttrs;
+  for (auto *A : C->getAttrs())
+    NewAttrs.push_back(getDerived().TransformAttr(A));
+  return getDerived().RebuildOMPXAttributeClause(
+      NewAttrs, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
+}
+
 //===----------------------------------------------------------------------===//
 // Expression transformation
 //===----------------------------------------------------------------------===//

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 5f756961c6e1d0..dcb845dd551e8d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -10370,6 +10370,9 @@ OMPClause *OMPClauseReader::readClause() {
     C = OMPDoacrossClause::CreateEmpty(Context, NumVars, NumLoops);
     break;
   }
+  case llvm::omp::OMPC_ompx_attribute:
+    C = new (Context) OMPXAttributeClause();
+    break;
 #define OMP_CLAUSE_NO_CLASS(Enum, Str)                                         \
   case llvm::omp::Enum:                                                        \
     break;
@@ -11462,6 +11465,15 @@ void OMPClauseReader::VisitOMPDoacrossClause(OMPDoacrossClause *C) {
     C->setLoopData(I, Record.readSubExpr());
 }
 
+void OMPClauseReader::VisitOMPXAttributeClause(OMPXAttributeClause *C) {
+  AttrVec Attrs;
+  Record.readAttributes(Attrs);
+  C->setAttrs(Attrs);
+  C->setLocStart(Record.readSourceLocation());
+  C->setLParenLoc(Record.readSourceLocation());
+  C->setLocEnd(Record.readSourceLocation());
+}
+
 OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() {
   OMPTraitInfo &TI = getContext().getNewOMPTraitInfo();
   TI.Sets.resize(readUInt32());

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 26279d399b53a9..e238ad3d186fba 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7171,6 +7171,13 @@ void OMPClauseWriter::VisitOMPDoacrossClause(OMPDoacrossClause *C) {
     Record.AddStmt(C->getLoopData(I));
 }
 
+void OMPClauseWriter::VisitOMPXAttributeClause(OMPXAttributeClause *C) {
+  Record.AddAttributes(C->getAttrs());
+  Record.AddSourceLocation(C->getBeginLoc());
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getEndLoc());
+}
+
 void ASTRecordWriter::writeOMPTraitInfo(const OMPTraitInfo *TI) {
   writeUInt32(TI->Sets.size());
   for (const auto &Set : TI->Sets) {

diff  --git a/clang/test/OpenMP/ompx_attributes_codegen.cpp b/clang/test/OpenMP/ompx_attributes_codegen.cpp
new file mode 100644
index 00000000000000..21e9805cbe8293
--- /dev/null
+++ b/clang/test/OpenMP/ompx_attributes_codegen.cpp
@@ -0,0 +1,31 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+
+// Check that the target attributes are set on the generated kernel
+void func() {
+  // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l15() #0
+  // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l17()
+  // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l19() #4
+
+  #pragma omp target ompx_attribute([[clang::amdgpu_flat_work_group_size(10, 20)]])
+  {}
+  #pragma omp target teams ompx_attribute(__attribute__((launch_bounds(45, 90))))
+  {}
+  #pragma omp target teams distribute parallel for simd ompx_attribute([[clang::amdgpu_flat_work_group_size(3, 17)]]) device(3) ompx_attribute(__attribute__((amdgpu_waves_per_eu(3, 7))))
+  for (int i = 0; i < 1000; ++i)
+  {}
+}
+
+// CHECK: attributes #0
+// CHECK-SAME: "amdgpu-flat-work-group-size"="10,20"
+// CHECK: attributes #4
+// CHECK-SAME: "amdgpu-flat-work-group-size"="3,17"
+// CHECK-SAME: "amdgpu-waves-per-eu"="3,7"
+
+// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"maxntidx", i32 45}
+// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"minctasm", i32 90}

diff  --git a/clang/test/OpenMP/ompx_attributes_messages.cpp b/clang/test/OpenMP/ompx_attributes_messages.cpp
new file mode 100644
index 00000000000000..c59c19027d26f8
--- /dev/null
+++ b/clang/test/OpenMP/ompx_attributes_messages.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -verify=expected -fopenmp -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized
+
+void bad() {
+  #pragma omp target data ompx_attribute() //  expected-error {{unexpected OpenMP clause 'ompx_attribute' in directive '#pragma omp target data'}}
+  #pragma omp target data ompx_attribute(__attribute__((launch_bounds(1, 2)))) //  expected-error {{unexpected OpenMP clause 'ompx_attribute' in directive '#pragma omp target data'}} expected-error {{expected at least one 'map', 'use_device_ptr', or 'use_device_addr' clause for '#pragma omp target data'}}
+
+  #pragma omp target ompx_attribute()
+  {}
+  #pragma omp target ompx_attribute(__attribute__(()))
+  {}
+  #pragma omp target ompx_attribute(__attribute__((pure))) //  expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'pure' is ignored}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((pure,amdgpu_waves_per_eu(1, 2), const))) //  expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'pure' is ignored}} expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'const' is ignored}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_waves_per_eu()))) //  expected-error {{'amdgpu_waves_per_eu' attribute takes at least 1 argument}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_waves_per_eu(1, 2, 3)))) //  expected-error {{'amdgpu_waves_per_eu' attribute takes no more than 2 arguments}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(1)))) //  expected-error {{'amdgpu_flat_work_group_size' attribute requires exactly 2 arguments}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(1, 2, 3,)))) //  expected-error {{expected expression}}
+  {}
+  #pragma omp target ompx_attribute([[clang::amdgpu_waves_per_eu(1, 2, 3)]]) //  expected-error {{'amdgpu_waves_per_eu' attribute takes no more than 2 arguments}}
+  {}
+  #pragma omp target ompx_attribute([[clang::unknown]]) //  expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'unknown' is ignored}}
+  {}
+  #pragma omp target ompx_attribute(baz) //  expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1))))
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(bad)))) //  expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, //  expected-error {{expected expression}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2 //  expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2) //  expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2)) //  expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2))) //  expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, -3)))) //  expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_waves_per_eu(10, 1)))) //  expected-error {{'amdgpu_waves_per_eu' attribute argument is invalid: min must not be greater than max}}
+  {}
+}

diff  --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 39886b23bb36f1..1bdc0bf742a8ce 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2720,6 +2720,8 @@ void OMPClauseEnqueue::VisitOMPXDynCGroupMemClause(
 void OMPClauseEnqueue::VisitOMPDoacrossClause(const OMPDoacrossClause *C) {
   VisitOMPClauseList(C);
 }
+void OMPClauseEnqueue::VisitOMPXAttributeClause(const OMPXAttributeClause *C) {
+}
 
 } // namespace
 

diff  --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 7337102d40e8e7..b4b838a45f7eae 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -1962,6 +1962,7 @@ CHECK_SIMPLE_CLAUSE(Align, OMPC_align)
 CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare)
 CHECK_SIMPLE_CLAUSE(CancellationConstructType, OMPC_cancellation_construct_type)
 CHECK_SIMPLE_CLAUSE(Doacross, OMPC_doacross)
+CHECK_SIMPLE_CLAUSE(OmpxAttribute, OMPC_ompx_attribute)
 
 CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize)
 CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks)

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index c67b54acc47c0d..68f7eca4daffbf 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -448,6 +448,10 @@ def OMPC_Doacross : Clause<"doacross"> {
   let clangClass = "OMPDoacrossClause";
 }
 
+def OMPC_OMPX_Attribute : Clause<"ompx_attribute"> {
+  let clangClass = "OMPXAttributeClause";
+}
+
 //===----------------------------------------------------------------------===//
 // Definition of OpenMP directives
 //===----------------------------------------------------------------------===//
@@ -460,7 +464,8 @@ def OMP_Parallel : Directive<"parallel"> {
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Copyin>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
@@ -645,7 +650,8 @@ def OMP_Target : Directive<"target"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_InReduction, 50>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
@@ -661,7 +667,8 @@ def OMP_Teams : Directive<"teams"> {
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
@@ -744,7 +751,8 @@ def OMP_TargetParallel : Directive<"target parallel"> {
     VersionedClause<OMPC_IsDevicePtr>,
     VersionedClause<OMPC_HasDeviceAddr, 51>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_DefaultMap>,
@@ -779,7 +787,8 @@ def OMP_TargetParallelFor : Directive<"target parallel for"> {
     VersionedClause<OMPC_HasDeviceAddr, 51>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -844,7 +853,8 @@ def OMP_ParallelFor : Directive<"parallel for"> {
     VersionedClause<OMPC_Ordered>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelDo : Directive<"parallel do"> {
@@ -889,7 +899,8 @@ def OMP_ParallelForSimd : Directive<"parallel for simd"> {
     VersionedClause<OMPC_Ordered>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelDoSimd : Directive<"parallel do simd"> {
@@ -929,7 +940,8 @@ def OMP_ParallelMaster : Directive<"parallel master"> {
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_ProcBind>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelMasked : Directive<"parallel masked"> {
@@ -944,7 +956,8 @@ def OMP_ParallelMasked : Directive<"parallel masked"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Filter>
+    VersionedClause<OMPC_Filter>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelSections : Directive<"parallel sections"> {
@@ -958,7 +971,8 @@ def OMP_ParallelSections : Directive<"parallel sections"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_LastPrivate>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_NumThreads>
@@ -1127,7 +1141,8 @@ def OMP_DistributeParallelFor : Directive<"distribute parallel for"> {
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_Schedule>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_DistributeParallelDo : Directive<"distribute parallel do"> {
@@ -1174,7 +1189,8 @@ def OMP_DistributeParallelForSimd : Directive<"distribute parallel for simd"> {
     VersionedClause<OMPC_SimdLen>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_DistributeParallelDoSimd : Directive<"distribute parallel do simd"> {
@@ -1256,7 +1272,8 @@ def OMP_TargetParallelForSimd : Directive<"target parallel for simd"> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -1309,7 +1326,8 @@ def OMP_TargetSimd : Directive<"target simd"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
@@ -1337,7 +1355,8 @@ def OMP_TeamsDistribute : Directive<"teams distribute"> {
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> {
@@ -1350,7 +1369,8 @@ def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> {
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Shared>
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
@@ -1388,7 +1408,8 @@ def OMP_TeamsDistributeParallelForSimd :
     VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_TeamsDistributeParallelDoSimd :
@@ -1438,7 +1459,8 @@ def OMP_TeamsDistributeParallelFor :
     VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_TeamsDistributeParallelDo :
@@ -1479,7 +1501,8 @@ def OMP_TargetTeams : Directive<"target teams"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_UsesAllocators, 50>,
-    VersionedClause<OMPC_Shared>
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 
   let allowedOnceClauses = [
@@ -1505,7 +1528,8 @@ def OMP_TargetTeamsDistribute : Directive<"target teams distribute"> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_UsesAllocators, 50>,
     VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_LastPrivate>
+    VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
@@ -1546,7 +1570,8 @@ def OMP_TargetTeamsDistributeParallelFor :
     VersionedClause<OMPC_Schedule>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -1617,7 +1642,8 @@ def OMP_TargetTeamsDistributeParallelForSimd :
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -1678,7 +1704,8 @@ def OMP_TargetTeamsDistributeSimd :
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
@@ -1773,7 +1800,8 @@ def OMP_ParallelMasterTaskloop :
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
-    VersionedClause<OMPC_Copyin>
+    VersionedClause<OMPC_Copyin>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelMaskedTaskloop :
@@ -1798,7 +1826,8 @@ def OMP_ParallelMaskedTaskloop :
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Copyin>,
-    VersionedClause<OMPC_Filter>
+    VersionedClause<OMPC_Filter>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_MasterTaskloopSimd : Directive<"master taskloop simd"> {
@@ -1883,7 +1912,8 @@ def OMP_ParallelMasterTaskloopSimd :
     VersionedClause<OMPC_SafeLen>,
     VersionedClause<OMPC_SimdLen>,
     VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelMaskedTaskloopSimd :
@@ -1914,7 +1944,8 @@ def OMP_ParallelMaskedTaskloopSimd :
     VersionedClause<OMPC_SimdLen>,
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_Filter>
+    VersionedClause<OMPC_Filter>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_Depobj : Directive<"depobj"> {
@@ -2021,6 +2052,7 @@ def OMP_teams_loop : Directive<"teams loop"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Bind, 50>,
@@ -2045,7 +2077,8 @@ def OMP_target_teams_loop : Directive<"target teams loop"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Bind, 50>,
@@ -2068,6 +2101,7 @@ def OMP_parallel_loop : Directive<"parallel loop"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Bind, 50>,
@@ -2094,6 +2128,7 @@ def OMP_target_parallel_loop : Directive<"target parallel loop"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Bind, 50>,