[clang] ef9ec4b - [OpenMP] Add the `ompx_attribute` clause for target directives

Mon Jul 24 22:06:20 PDT 2023

Author: Johannes Doerfert
Date: 2023-07-24T22:04:45-07:00
New Revision: ef9ec4bbcca2fa4f64df47bc426f1d1c59ea47e2

URL: https://github.com/llvm/llvm-project/commit/ef9ec4bbcca2fa4f64df47bc426f1d1c59ea47e2
DIFF: https://github.com/llvm/llvm-project/commit/ef9ec4bbcca2fa4f64df47bc426f1d1c59ea47e2.diff

LOG: [OpenMP] Add the `ompx_attribute` clause for target directives

CUDA and HIP have kernel attributes to tune the code generation (in the
backend). To reuse this functionality for OpenMP target regions we
introduce the `ompx_attribute` clause that takes these kernel
attributes and emits code as if they had been attached to the kernel
fuction (which is implicitly generated).

To limit the impact, we only support three kernel attributes:
`amdgpu_waves_per_eu`, for AMDGPU
`amdgpu_flat_work_group_size`, for AMDGPU
`launch_bounds`, for NVPTX

The existing implementations of those attributes are used for error
checking and code generation. `ompx_attribute` can be attached to any
executable target region and it can hold more than one kernel attribute.

Differential Revision: https://reviews.llvm.org/D156184

Added: 
    clang/test/OpenMP/ompx_attributes_codegen.cpp
    clang/test/OpenMP/ompx_attributes_messages.cpp

Modified: 
    clang/include/clang/AST/OpenMPClause.h
    clang/include/clang/AST/RecursiveASTVisitor.h
    clang/include/clang/Basic/DiagnosticGroups.td
    clang/include/clang/Basic/DiagnosticParseKinds.td
    clang/include/clang/Parse/Parser.h
    clang/include/clang/Sema/Sema.h
    clang/lib/AST/OpenMPClause.cpp
    clang/lib/AST/StmtProfile.cpp
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/lib/CodeGen/CodeGenModule.h
    clang/lib/CodeGen/Targets/AMDGPU.cpp
    clang/lib/CodeGen/Targets/NVPTX.cpp
    clang/lib/Parse/ParseOpenMP.cpp
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/lib/Sema/SemaOpenMP.cpp
    clang/lib/Sema/TreeTransform.h
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTWriter.cpp
    clang/tools/libclang/CIndex.cpp
    llvm/include/llvm/Frontend/OpenMP/OMP.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 0bea21270692cf..31ae3d42e232fc 100644

--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -9172,6 +9172,54 @@ class OMPDoacrossClause final
   }
 };
 
+/// This represents 'ompx_attribute' clause in a directive that might generate
+/// an outlined function. An example is given below.
+///
+/// \code
+/// #pragma omp target [...] ompx_attribute(flatten)
+/// \endcode
+class OMPXAttributeClause
+    : public OMPNoChildClause<llvm::omp::OMPC_ompx_attribute> {
+  friend class OMPClauseReader;
+
+  /// Location of '('.
+  SourceLocation LParenLoc;
+
+  /// The parsed attributes (clause arguments)
+  SmallVector<const Attr *> Attrs;
+
+public:
+  /// Build 'ompx_attribute' clause.
+  ///
+  /// \param Attrs The parsed attributes (clause arguments)
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  OMPXAttributeClause(ArrayRef<const Attr *> Attrs, SourceLocation StartLoc,
+                      SourceLocation LParenLoc, SourceLocation EndLoc)
+      : OMPNoChildClause(StartLoc, EndLoc), LParenLoc(LParenLoc), Attrs(Attrs) {
+  }
+
+  /// Build an empty clause.
+  OMPXAttributeClause() : OMPNoChildClause() {}
+
+  /// Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+  /// Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// Returned the attributes parsed from this clause.
+  ArrayRef<const Attr *> getAttrs() const { return Attrs; }
+
+private:
+  /// Replace the attributes with \p NewAttrs.
+  void setAttrs(ArrayRef<Attr *> NewAttrs) {
+    Attrs.clear();
+    Attrs.append(NewAttrs.begin(), NewAttrs.end());
+  }
+};
+
 } // namespace clang
 
 #endif // LLVM_CLANG_AST_OPENMPCLAUSE_H

diff  --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 604875cd6337a4..fc2d1ff708bf7a 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3875,6 +3875,12 @@ bool RecursiveASTVisitor<Derived>::VisitOMPDoacrossClause(
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPXAttributeClause(
+    OMPXAttributeClause *C) {
+  return true;
+}
+
 // FIXME: look at the following tricky-seeming exprs to see if we
 // need to recurse on anything.  These are ones that have methods
 // returning decls or qualtypes or nestednamespecifier -- though I'm

diff  --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 7b4d415bf06494..6a0a01e4a981a4 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1278,9 +1278,10 @@ def OpenMPMapping : DiagGroup<"openmp-mapping">;
 def OpenMPTarget : DiagGroup<"openmp-target", [OpenMPMapping]>;
 def OpenMPPre51Compat : DiagGroup<"pre-openmp-51-compat">;
 def OpenMP51Ext : DiagGroup<"openmp-51-extensions">;
+def OpenMPExtensions : DiagGroup<"openmp-extensions">;
 def OpenMP : DiagGroup<"openmp", [
     SourceUsesOpenMP, OpenMPClauses, OpenMPLoopForm, OpenMPTarget,
-    OpenMPMapping, OpenMP51Ext
+    OpenMPMapping, OpenMP51Ext, OpenMPExtensions
   ]>;
 
 // Backend warnings.

diff  --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 8d729c31641ed8..a804442ab34ec7 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1540,6 +1540,9 @@ def warn_omp_more_one_omp_all_memory : Warning<
   InGroup<OpenMPClauses>;
 def warn_omp_depend_in_ordered_deprecated : Warning<"'depend' clause for"
   " 'ordered' is deprecated; use 'doacross' instead">, InGroup<Deprecated>;
+def warn_omp_invalid_attribute_for_ompx_attributes : Warning<"'ompx_attribute' clause only allows "
+  "'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; "
+  "%0 is ignored">, InGroup<OpenMPExtensions>;
 
 // Pragma loop support.
 def err_pragma_loop_missing_argument : Error<

diff  --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 475dfe845528d9..b58041477c9ec9 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3490,6 +3490,13 @@ class Parser : public CodeCompletionHandler {
   //
   OMPClause *ParseOpenMPInteropClause(OpenMPClauseKind Kind, bool ParseOnly);
 
+  /// Parses a ompx_attribute clause
+  ///
+  /// \param ParseOnly true to skip the clause's semantic actions and return
+  /// nullptr.
+  //
+  OMPClause *ParseOpenMPOMPXAttributesClause(bool ParseOnly);
+
 public:
   /// Parses simple expression in parens for single-expression clauses of OpenMP
   /// constructs.

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 3418a37b307785..7c641d5e273a03 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10988,6 +10988,11 @@ class Sema final {
   bool ConstantFoldAttrArgs(const AttributeCommonInfo &CI,
                             MutableArrayRef<Expr *> Args);
 
+  /// Create an CUDALaunchBoundsAttr attribute.
+  CUDALaunchBoundsAttr *CreateLaunchBoundsAttr(const AttributeCommonInfo &CI,
+                                               Expr *MaxThreads,
+                                               Expr *MinBlocks);
+
   /// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular
   /// declaration.
   void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
@@ -11004,11 +11009,21 @@ class Sema final {
   void AddXConsumedAttr(Decl *D, const AttributeCommonInfo &CI,
                         RetainOwnershipKind K, bool IsTemplateInstantiation);
 
+  /// Create an AMDGPUWavesPerEUAttr attribute.
+  AMDGPUFlatWorkGroupSizeAttr *
+  CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min,
+                                    Expr *Max);
+
   /// addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size
   /// attribute to a particular declaration.
   void addAMDGPUFlatWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI,
                                       Expr *Min, Expr *Max);
 
+  /// Create an AMDGPUWavesPerEUAttr attribute.
+  AMDGPUWavesPerEUAttr *
+  CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI, Expr *Min,
+                             Expr *Max);
+
   /// addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a
   /// particular declaration.
   void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
@@ -12341,6 +12356,12 @@ class Sema final {
                             ArrayRef<Expr *> VarList, SourceLocation StartLoc,
                             SourceLocation LParenLoc, SourceLocation EndLoc);
 
+  /// Called on a well-formed 'ompx_attribute' clause.
+  OMPClause *ActOnOpenMPXAttributeClause(ArrayRef<const Attr *> Attrs,
+                                         SourceLocation StartLoc,
+                                         SourceLocation LParenLoc,
+                                         SourceLocation EndLoc);
+
   /// The kind of conversion being performed.
   enum CheckedConversionKind {
     /// An implicit conversion.

diff  --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 4c895822ffdf85..f5ad75028a641e 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -2534,6 +2534,18 @@ void OMPClausePrinter::VisitOMPDoacrossClause(OMPDoacrossClause *Node) {
   OS << ")";
 }
 
+void OMPClausePrinter::VisitOMPXAttributeClause(OMPXAttributeClause *Node) {
+  OS << "ompx_attribute(";
+  bool IsFirst = true;
+  for (auto &Attr : Node->getAttrs()) {
+    if (!IsFirst)
+      OS << ", ";
+    Attr->printPretty(OS, Policy);
+    IsFirst = false;
+  }
+  OS << ")";
+}
+
 void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
                                          VariantMatchInfo &VMI) const {
   for (const OMPTraitSet &Set : Sets) {

diff  --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index d8a667b2d0fdc4..60646f7a0da57c 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -928,6 +928,8 @@ void OMPClauseProfiler::VisitOMPXDynCGroupMemClause(
 void OMPClauseProfiler::VisitOMPDoacrossClause(const OMPDoacrossClause *C) {
   VisitOMPClauseList(C);
 }
+void OMPClauseProfiler::VisitOMPXAttributeClause(const OMPXAttributeClause *C) {
+}
 } // namespace
 
 void

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a52ec8909b12ac..a4f7eb96c0d9bc 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6110,8 +6110,23 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
                                       DefaultValTeams, DefaultValThreads,
                                       IsOffloadEntry, OutlinedFn, OutlinedFnID);
 
-  if (OutlinedFn != nullptr)
-    CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
+  if (!OutlinedFn)
+    return;
+
+  CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
+
+  for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
+    for (auto *A : C->getAttrs()) {
+      if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
+        CGM.handleCUDALaunchBoundsAttr(OutlinedFn, Attr);
+      else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
+        CGM.handleAMDGPUFlatWorkGroupSizeAttr(OutlinedFn, Attr);
+      else if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
+        CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
+      else
+        llvm_unreachable("Unexpected attribute kind");
+    }
+  }
 }
 
 /// Checks if the expression is constant or does not have non-trivial function

diff  --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 05cb217e2bee4e..f5fd94492540f2 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1557,6 +1557,21 @@ class CodeGenModule : public CodeGenTypeCache {
   /// because we'll lose all important information after each repl.
   void moveLazyEmissionStates(CodeGenModule *NewBuilder);
 
+  /// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F.
+  void handleCUDALaunchBoundsAttr(llvm::Function *F,
+                                  const CUDALaunchBoundsAttr *A);
+
+  /// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute
+  /// to \p F. Alternatively, the work group size can be taken from a \p
+  /// ReqdWGS.
+  void handleAMDGPUFlatWorkGroupSizeAttr(
+      llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A,
+      const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr);
+
+  /// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F.
+  void handleAMDGPUWavesPerEUAttr(llvm::Function *F,
+                                  const AMDGPUWavesPerEUAttr *A);
+
 private:
   llvm::Constant *GetOrCreateLLVMFunction(
       StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,

diff  --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 796a2be81a09c7..bac7787643e33a 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -317,26 +317,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
 
   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
   if (ReqdWGS || FlatWGS) {
-    unsigned Min = 0;
-    unsigned Max = 0;
-    if (FlatWGS) {
-      Min = FlatWGS->getMin()
-                ->EvaluateKnownConstInt(M.getContext())
-                .getExtValue();
-      Max = FlatWGS->getMax()
-                ->EvaluateKnownConstInt(M.getContext())
-                .getExtValue();
-    }
-    if (ReqdWGS && Min == 0 && Max == 0)
-      Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
-
-    if (Min != 0) {
-      assert(Min <= Max && "Min must be less than or equal Max");
-
-      std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
-      F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
-    } else
-      assert(Max == 0 && "Max must be zero");
+    M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS);
   } else if (IsOpenCLKernel || IsHIPKernel) {
     // By default, restrict the maximum size to a value specified by
     // --gpu-max-threads-per-block=n or its default value for HIP.
@@ -349,24 +330,8 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
     F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
   }
 
-  if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
-    unsigned Min =
-        Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue();
-    unsigned Max = Attr->getMax() ? Attr->getMax()
-                                        ->EvaluateKnownConstInt(M.getContext())
-                                        .getExtValue()
-                                  : 0;
-
-    if (Min != 0) {
-      assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
-
-      std::string AttrVal = llvm::utostr(Min);
-      if (Max != 0)
-        AttrVal = AttrVal + "," + llvm::utostr(Max);
-      F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
-    } else
-      assert(Max == 0 && "Max must be zero");
-  }
+  if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>())
+    M.handleAMDGPUWavesPerEUAttr(F, Attr);
 
   if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
     unsigned NumSGPR = Attr->getNumSGPR();
@@ -595,6 +560,47 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
   return F;
 }
 
+void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr(
+    llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
+    const ReqdWorkGroupSizeAttr *ReqdWGS) {
+  unsigned Min = 0;
+  unsigned Max = 0;
+  if (FlatWGS) {
+    Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
+    Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue();
+  }
+  if (ReqdWGS && Min == 0 && Max == 0)
+    Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
+
+  if (Min != 0) {
+    assert(Min <= Max && "Min must be less than or equal Max");
+
+    std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
+    F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
+  } else
+    assert(Max == 0 && "Max must be zero");
+}
+
+void CodeGenModule::handleAMDGPUWavesPerEUAttr(
+    llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) {
+  unsigned Min =
+      Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
+  unsigned Max =
+      Attr->getMax()
+          ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue()
+          : 0;
+
+  if (Min != 0) {
+    assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
+
+    std::string AttrVal = llvm::utostr(Min);
+    if (Max != 0)
+      AttrVal = AttrVal + "," + llvm::utostr(Max);
+    F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
+  } else
+    assert(Max == 0 && "Max must be zero");
+}
+
 std::unique_ptr<TargetCodeGenInfo>
 CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) {
   return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes());

diff  --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 1ca0192333a0f8..0d4bbd79564800 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -71,12 +71,12 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
     return true;
   }
 
-private:
   // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
   // resulting MDNode to the nvvm.annotations MDNode.
   static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
                               int Operand);
 
+private:
   static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
                                            LValue Src) {
     llvm::Value *Handle = nullptr;
@@ -256,24 +256,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
       // Create !{<func-ref>, metadata !"kernel", i32 1} node
       addNVVMMetadata(F, "kernel", 1);
     }
-    if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
-      // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
-      llvm::APSInt MaxThreads(32);
-      MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
-      if (MaxThreads > 0)
-        addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
-
-      // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
-      // not specified in __launch_bounds__ or if the user specified a 0 value,
-      // we don't have to add a PTX directive.
-      if (Attr->getMinBlocks()) {
-        llvm::APSInt MinBlocks(32);
-        MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
-        if (MinBlocks > 0)
-          // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
-          addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
-      }
-    }
+    if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
+      M.handleCUDALaunchBoundsAttr(F, Attr);
   }
 
   // Attach kernel metadata directly if compiling for NVPTX.
@@ -303,6 +287,28 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
 }
 }
 
+void CodeGenModule::handleCUDALaunchBoundsAttr(
+    llvm::Function *F, const CUDALaunchBoundsAttr *Attr) {
+  // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+  llvm::APSInt MaxThreads(32);
+  MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
+  if (MaxThreads > 0)
+    NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
+                                            MaxThreads.getExtValue());
+
+  // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
+  // not specified in __launch_bounds__ or if the user specified a 0 value,
+  // we don't have to add a PTX directive.
+  if (Attr->getMinBlocks()) {
+    llvm::APSInt MinBlocks(32);
+    MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
+    if (MinBlocks > 0)
+      // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+      NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
+                                              MinBlocks.getExtValue());
+  }
+}
+
 std::unique_ptr<TargetCodeGenInfo>
 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
   return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());

diff  --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 96d2e2cede6289..66cabb19423348 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3411,6 +3411,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
           << getOpenMPClauseName(CKind) << getOpenMPDirectiveName(DKind);
     SkipUntil(tok::comma, tok::annot_pragma_openmp_end, StopBeforeMatch);
     break;
+  case OMPC_ompx_attribute:
+    Clause = ParseOpenMPOMPXAttributesClause(WrongDirective);
+    break;
   default:
     break;
   }
@@ -3691,6 +3694,62 @@ OMPClause *Parser::ParseOpenMPInteropClause(OpenMPClauseKind Kind,
   llvm_unreachable("Unexpected interop variable clause.");
 }
 
+OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) {
+  SourceLocation Loc = ConsumeToken();
+  // Parse '('.
+  BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+  if (T.expectAndConsume(diag::err_expected_lparen_after,
+                         getOpenMPClauseName(OMPC_ompx_attribute).data()))
+    return nullptr;
+
+  ParsedAttributes ParsedAttrs(AttrFactory);
+  ParseAttributes(PAKM_GNU | PAKM_CXX11, ParsedAttrs);
+
+  // Parse ')'.
+  if (T.consumeClose())
+    return nullptr;
+
+  if (ParseOnly)
+    return nullptr;
+
+  SmallVector<Attr *> Attrs;
+  for (const ParsedAttr &PA : ParsedAttrs) {
+    switch (PA.getKind()) {
+    case ParsedAttr::AT_AMDGPUFlatWorkGroupSize:
+      if (!PA.checkExactlyNumArgs(Actions, 2))
+        continue;
+      if (auto *A = Actions.CreateAMDGPUFlatWorkGroupSizeAttr(
+              PA, PA.getArgAsExpr(0), PA.getArgAsExpr(1)))
+        Attrs.push_back(A);
+      continue;
+    case ParsedAttr::AT_AMDGPUWavesPerEU:
+      if (!PA.checkAtLeastNumArgs(Actions, 1) ||
+          !PA.checkAtMostNumArgs(Actions, 2))
+        continue;
+      if (auto *A = Actions.CreateAMDGPUWavesPerEUAttr(
+              PA, PA.getArgAsExpr(0),
+              PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr))
+        Attrs.push_back(A);
+      continue;
+    case ParsedAttr::AT_CUDALaunchBounds:
+      if (!PA.checkAtLeastNumArgs(Actions, 1) ||
+          !PA.checkAtMostNumArgs(Actions, 2))
+        continue;
+      if (auto *A = Actions.CreateLaunchBoundsAttr(
+              PA, PA.getArgAsExpr(0),
+              PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr))
+        Attrs.push_back(A);
+      continue;
+    default:
+      Diag(Loc, diag::warn_omp_invalid_attribute_for_ompx_attributes) << PA;
+      continue;
+    };
+  }
+
+  return Actions.ActOnOpenMPXAttributeClause(Attrs, Loc, T.getOpenLocation(),
+                                             T.getCloseLocation());
+}
+
 /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'.
 ///
 ///    default-clause:

diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index ed69e802c95dd5..429fa12ff2e293 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5633,21 +5633,28 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,
   return ValArg.getAs<Expr>();
 }
 
-void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
-                               Expr *MaxThreads, Expr *MinBlocks) {
+CUDALaunchBoundsAttr *
+Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads,
+                             Expr *MinBlocks) {
   CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks);
   MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0);
   if (MaxThreads == nullptr)
-    return;
+    return nullptr;
 
   if (MinBlocks) {
     MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1);
     if (MinBlocks == nullptr)
-      return;
+      return nullptr;
   }
 
-  D->addAttr(::new (Context)
-                 CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks));
+  return ::new (Context)
+      CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks);
+}
+
+void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
+                               Expr *MaxThreads, Expr *MinBlocks) {
+  if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks))
+    D->addAttr(Attr);
 }
 
 static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
@@ -7862,16 +7869,22 @@ checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr,
   return false;
 }
 
-void Sema::addAMDGPUFlatWorkGroupSizeAttr(Decl *D,
-                                          const AttributeCommonInfo &CI,
-                                          Expr *MinExpr, Expr *MaxExpr) {
+AMDGPUFlatWorkGroupSizeAttr *
+Sema::CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI,
+                                        Expr *MinExpr, Expr *MaxExpr) {
   AMDGPUFlatWorkGroupSizeAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
 
   if (checkAMDGPUFlatWorkGroupSizeArguments(*this, MinExpr, MaxExpr, TmpAttr))
-    return;
+    return nullptr;
+  return ::new (Context)
+      AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr);
+}
 
-  D->addAttr(::new (Context)
-                 AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr));
+void Sema::addAMDGPUFlatWorkGroupSizeAttr(Decl *D,
+                                          const AttributeCommonInfo &CI,
+                                          Expr *MinExpr, Expr *MaxExpr) {
+  if (auto *Attr = CreateAMDGPUFlatWorkGroupSizeAttr(CI, MinExpr, MaxExpr))
+    D->addAttr(Attr);
 }
 
 static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D,
@@ -7916,15 +7929,21 @@ static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr,
   return false;
 }
 
-void Sema::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
-                                   Expr *MinExpr, Expr *MaxExpr) {
+AMDGPUWavesPerEUAttr *
+Sema::CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI, Expr *MinExpr,
+                                 Expr *MaxExpr) {
   AMDGPUWavesPerEUAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
 
   if (checkAMDGPUWavesPerEUArguments(*this, MinExpr, MaxExpr, TmpAttr))
-    return;
+    return nullptr;
+
+  return ::new (Context) AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr);
+}
 
-  D->addAttr(::new (Context)
-                 AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr));
+void Sema::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
+                                   Expr *MinExpr, Expr *MaxExpr) {
+  if (auto *Attr = CreateAMDGPUWavesPerEUAttr(CI, MinExpr, MaxExpr))
+    D->addAttr(Attr);
 }
 
 static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D, const ParsedAttr &AL) {

diff  --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index cf805987b378e3..3954bf2ad28041 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -30,6 +30,7 @@
 #include "clang/Sema/EnterExpressionEvaluationContext.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaInternal.h"
@@ -24093,3 +24094,10 @@ OMPClause *Sema::ActOnOpenMPDoacrossClause(
     DSAStack->addDoacrossDependClause(C, OpsOffs);
   return C;
 }
+
+OMPClause *Sema::ActOnOpenMPXAttributeClause(ArrayRef<const Attr *> Attrs,
+                                             SourceLocation StartLoc,
+                                             SourceLocation LParenLoc,
+                                             SourceLocation EndLoc) {
+  return new (Context) OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc);
+}

diff  --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 10b3587885e39f..a73b54b668a415 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -2377,6 +2377,18 @@ class TreeTransform {
                                                     EndLoc);
   }
 
+  /// Build a new OpenMP 'ompx_attribute' clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide 
diff erent behavior.
+  OMPClause *RebuildOMPXAttributeClause(ArrayRef<const Attr *> Attrs,
+                                        SourceLocation StartLoc,
+                                        SourceLocation LParenLoc,
+                                        SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPXAttributeClause(Attrs, StartLoc, LParenLoc,
+                                                 EndLoc);
+  }
+
   /// Build a new OpenMP 'align' clause.
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
@@ -10756,6 +10768,16 @@ TreeTransform<Derived>::TransformOMPDoacrossClause(OMPDoacrossClause *C) {
       C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPXAttributeClause(OMPXAttributeClause *C) {
+  SmallVector<const Attr *> NewAttrs;
+  for (auto *A : C->getAttrs())
+    NewAttrs.push_back(getDerived().TransformAttr(A));
+  return getDerived().RebuildOMPXAttributeClause(
+      NewAttrs, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
+}
+
 //===----------------------------------------------------------------------===//
 // Expression transformation
 //===----------------------------------------------------------------------===//

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 5f756961c6e1d0..dcb845dd551e8d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -10370,6 +10370,9 @@ OMPClause *OMPClauseReader::readClause() {
     C = OMPDoacrossClause::CreateEmpty(Context, NumVars, NumLoops);
     break;
   }
+  case llvm::omp::OMPC_ompx_attribute:
+    C = new (Context) OMPXAttributeClause();
+    break;
 #define OMP_CLAUSE_NO_CLASS(Enum, Str)                                         \
   case llvm::omp::Enum:                                                        \
     break;
@@ -11462,6 +11465,15 @@ void OMPClauseReader::VisitOMPDoacrossClause(OMPDoacrossClause *C) {
     C->setLoopData(I, Record.readSubExpr());
 }
 
+void OMPClauseReader::VisitOMPXAttributeClause(OMPXAttributeClause *C) {
+  AttrVec Attrs;
+  Record.readAttributes(Attrs);
+  C->setAttrs(Attrs);
+  C->setLocStart(Record.readSourceLocation());
+  C->setLParenLoc(Record.readSourceLocation());
+  C->setLocEnd(Record.readSourceLocation());
+}
+
 OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() {
   OMPTraitInfo &TI = getContext().getNewOMPTraitInfo();
   TI.Sets.resize(readUInt32());

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 26279d399b53a9..e238ad3d186fba 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7171,6 +7171,13 @@ void OMPClauseWriter::VisitOMPDoacrossClause(OMPDoacrossClause *C) {
     Record.AddStmt(C->getLoopData(I));
 }
 
+void OMPClauseWriter::VisitOMPXAttributeClause(OMPXAttributeClause *C) {
+  Record.AddAttributes(C->getAttrs());
+  Record.AddSourceLocation(C->getBeginLoc());
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getEndLoc());
+}
+
 void ASTRecordWriter::writeOMPTraitInfo(const OMPTraitInfo *TI) {
   writeUInt32(TI->Sets.size());
   for (const auto &Set : TI->Sets) {

diff  --git a/clang/test/OpenMP/ompx_attributes_codegen.cpp b/clang/test/OpenMP/ompx_attributes_codegen.cpp
new file mode 100644
index 00000000000000..21e9805cbe8293
--- /dev/null
+++ b/clang/test/OpenMP/ompx_attributes_codegen.cpp
@@ -0,0 +1,31 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+
+// Check that the target attributes are set on the generated kernel
+void func() {
+  // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l15() #0
+  // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l17()
+  // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l19() #4
+
+  #pragma omp target ompx_attribute([[clang::amdgpu_flat_work_group_size(10, 20)]])
+  {}
+  #pragma omp target teams ompx_attribute(__attribute__((launch_bounds(45, 90))))
+  {}
+  #pragma omp target teams distribute parallel for simd ompx_attribute([[clang::amdgpu_flat_work_group_size(3, 17)]]) device(3) ompx_attribute(__attribute__((amdgpu_waves_per_eu(3, 7))))
+  for (int i = 0; i < 1000; ++i)
+  {}
+}
+
+// CHECK: attributes #0
+// CHECK-SAME: "amdgpu-flat-work-group-size"="10,20"
+// CHECK: attributes #4
+// CHECK-SAME: "amdgpu-flat-work-group-size"="3,17"
+// CHECK-SAME: "amdgpu-waves-per-eu"="3,7"
+
+// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"maxntidx", i32 45}
+// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"minctasm", i32 90}

diff  --git a/clang/test/OpenMP/ompx_attributes_messages.cpp b/clang/test/OpenMP/ompx_attributes_messages.cpp
new file mode 100644
index 00000000000000..c59c19027d26f8
--- /dev/null
+++ b/clang/test/OpenMP/ompx_attributes_messages.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -verify=expected -fopenmp -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized
+
+void bad() {
+  #pragma omp target data ompx_attribute() //  expected-error {{unexpected OpenMP clause 'ompx_attribute' in directive '#pragma omp target data'}}
+  #pragma omp target data ompx_attribute(__attribute__((launch_bounds(1, 2)))) //  expected-error {{unexpected OpenMP clause 'ompx_attribute' in directive '#pragma omp target data'}} expected-error {{expected at least one 'map', 'use_device_ptr', or 'use_device_addr' clause for '#pragma omp target data'}}
+
+  #pragma omp target ompx_attribute()
+  {}
+  #pragma omp target ompx_attribute(__attribute__(()))
+  {}
+  #pragma omp target ompx_attribute(__attribute__((pure))) //  expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'pure' is ignored}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((pure,amdgpu_waves_per_eu(1, 2), const))) //  expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'pure' is ignored}} expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'const' is ignored}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_waves_per_eu()))) //  expected-error {{'amdgpu_waves_per_eu' attribute takes at least 1 argument}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_waves_per_eu(1, 2, 3)))) //  expected-error {{'amdgpu_waves_per_eu' attribute takes no more than 2 arguments}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(1)))) //  expected-error {{'amdgpu_flat_work_group_size' attribute requires exactly 2 arguments}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(1, 2, 3,)))) //  expected-error {{expected expression}}
+  {}
+  #pragma omp target ompx_attribute([[clang::amdgpu_waves_per_eu(1, 2, 3)]]) //  expected-error {{'amdgpu_waves_per_eu' attribute takes no more than 2 arguments}}
+  {}
+  #pragma omp target ompx_attribute([[clang::unknown]]) //  expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'unknown' is ignored}}
+  {}
+  #pragma omp target ompx_attribute(baz) //  expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1))))
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(bad)))) //  expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, //  expected-error {{expected expression}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2 //  expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2) //  expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2)) //  expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2))) //  expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, -3)))) //  expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
+  {}
+  #pragma omp target ompx_attribute(__attribute__((amdgpu_waves_per_eu(10, 1)))) //  expected-error {{'amdgpu_waves_per_eu' attribute argument is invalid: min must not be greater than max}}
+  {}
+}

diff  --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 0c629fe336fad8..120ad4ea39e1a9 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2720,6 +2720,8 @@ void OMPClauseEnqueue::VisitOMPXDynCGroupMemClause(
 void OMPClauseEnqueue::VisitOMPDoacrossClause(const OMPDoacrossClause *C) {
   VisitOMPClauseList(C);
 }
+void OMPClauseEnqueue::VisitOMPXAttributeClause(const OMPXAttributeClause *C) {
+}
 
 } // namespace
 

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index c67b54acc47c0d..68f7eca4daffbf 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -448,6 +448,10 @@ def OMPC_Doacross : Clause<"doacross"> {
   let clangClass = "OMPDoacrossClause";
 }
 
+def OMPC_OMPX_Attribute : Clause<"ompx_attribute"> {
+  let clangClass = "OMPXAttributeClause";
+}
+
 //===----------------------------------------------------------------------===//
 // Definition of OpenMP directives
 //===----------------------------------------------------------------------===//
@@ -460,7 +464,8 @@ def OMP_Parallel : Directive<"parallel"> {
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Copyin>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
@@ -645,7 +650,8 @@ def OMP_Target : Directive<"target"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_InReduction, 50>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
@@ -661,7 +667,8 @@ def OMP_Teams : Directive<"teams"> {
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
@@ -744,7 +751,8 @@ def OMP_TargetParallel : Directive<"target parallel"> {
     VersionedClause<OMPC_IsDevicePtr>,
     VersionedClause<OMPC_HasDeviceAddr, 51>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_DefaultMap>,
@@ -779,7 +787,8 @@ def OMP_TargetParallelFor : Directive<"target parallel for"> {
     VersionedClause<OMPC_HasDeviceAddr, 51>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -844,7 +853,8 @@ def OMP_ParallelFor : Directive<"parallel for"> {
     VersionedClause<OMPC_Ordered>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelDo : Directive<"parallel do"> {
@@ -889,7 +899,8 @@ def OMP_ParallelForSimd : Directive<"parallel for simd"> {
     VersionedClause<OMPC_Ordered>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelDoSimd : Directive<"parallel do simd"> {
@@ -929,7 +940,8 @@ def OMP_ParallelMaster : Directive<"parallel master"> {
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_ProcBind>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelMasked : Directive<"parallel masked"> {
@@ -944,7 +956,8 @@ def OMP_ParallelMasked : Directive<"parallel masked"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Filter>
+    VersionedClause<OMPC_Filter>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelSections : Directive<"parallel sections"> {
@@ -958,7 +971,8 @@ def OMP_ParallelSections : Directive<"parallel sections"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_LastPrivate>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_NumThreads>
@@ -1127,7 +1141,8 @@ def OMP_DistributeParallelFor : Directive<"distribute parallel for"> {
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_Schedule>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_DistributeParallelDo : Directive<"distribute parallel do"> {
@@ -1174,7 +1189,8 @@ def OMP_DistributeParallelForSimd : Directive<"distribute parallel for simd"> {
     VersionedClause<OMPC_SimdLen>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_DistributeParallelDoSimd : Directive<"distribute parallel do simd"> {
@@ -1256,7 +1272,8 @@ def OMP_TargetParallelForSimd : Directive<"target parallel for simd"> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -1309,7 +1326,8 @@ def OMP_TargetSimd : Directive<"target simd"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
@@ -1337,7 +1355,8 @@ def OMP_TeamsDistribute : Directive<"teams distribute"> {
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_Allocate>
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> {
@@ -1350,7 +1369,8 @@ def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> {
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Shared>
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
@@ -1388,7 +1408,8 @@ def OMP_TeamsDistributeParallelForSimd :
     VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_TeamsDistributeParallelDoSimd :
@@ -1438,7 +1459,8 @@ def OMP_TeamsDistributeParallelFor :
     VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_TeamsDistributeParallelDo :
@@ -1479,7 +1501,8 @@ def OMP_TargetTeams : Directive<"target teams"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_UsesAllocators, 50>,
-    VersionedClause<OMPC_Shared>
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 
   let allowedOnceClauses = [
@@ -1505,7 +1528,8 @@ def OMP_TargetTeamsDistribute : Directive<"target teams distribute"> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_UsesAllocators, 50>,
     VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_LastPrivate>
+    VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
@@ -1546,7 +1570,8 @@ def OMP_TargetTeamsDistributeParallelFor :
     VersionedClause<OMPC_Schedule>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -1617,7 +1642,8 @@ def OMP_TargetTeamsDistributeParallelForSimd :
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -1678,7 +1704,8 @@ def OMP_TargetTeamsDistributeSimd :
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
@@ -1773,7 +1800,8 @@ def OMP_ParallelMasterTaskloop :
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
-    VersionedClause<OMPC_Copyin>
+    VersionedClause<OMPC_Copyin>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelMaskedTaskloop :
@@ -1798,7 +1826,8 @@ def OMP_ParallelMaskedTaskloop :
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Copyin>,
-    VersionedClause<OMPC_Filter>
+    VersionedClause<OMPC_Filter>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_MasterTaskloopSimd : Directive<"master taskloop simd"> {
@@ -1883,7 +1912,8 @@ def OMP_ParallelMasterTaskloopSimd :
     VersionedClause<OMPC_SafeLen>,
     VersionedClause<OMPC_SimdLen>,
     VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_ParallelMaskedTaskloopSimd :
@@ -1914,7 +1944,8 @@ def OMP_ParallelMaskedTaskloopSimd :
     VersionedClause<OMPC_SimdLen>,
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_Filter>
+    VersionedClause<OMPC_Filter>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
 }
 def OMP_Depobj : Directive<"depobj"> {
@@ -2021,6 +2052,7 @@ def OMP_teams_loop : Directive<"teams loop"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Bind, 50>,
@@ -2045,7 +2077,8 @@ def OMP_target_teams_loop : Directive<"target teams loop"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Bind, 50>,
@@ -2068,6 +2101,7 @@ def OMP_parallel_loop : Directive<"parallel loop"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Bind, 50>,
@@ -2094,6 +2128,7 @@ def OMP_target_parallel_loop : Directive<"target parallel loop"> {
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_OMPX_Attribute>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Bind, 50>,