[clang] [llvm] [OpenMP] OpenMP 5.1 "assume" directive parsing support (PR #92731)

Julian Brown via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 19 09:07:56 PDT 2024


https://github.com/jtb20 updated https://github.com/llvm/llvm-project/pull/92731

>From d83105c32a48d73fe547523841a115d6863f7799 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown at amd.com>
Date: Wed, 1 May 2024 06:35:59 -0500
Subject: [PATCH 1/2] [OpenMP] OpenMP 5.1 "assume" directive parsing support

This is a minimal patch to support parsing for "omp assume" directives.
These are meant to be hints to a compiler' optimisers: as such, it is
legitimate (if not very useful) to ignore them.  The patch builds on top
of the existing support for "omp assumes" directives (note spelling!).

Unlike the "omp [begin/end] assumes" directives, "omp assume" is
associated with a compound statement, i.e. it can appear within a
function.  The "holds" assumption could (theoretically) be mapped onto
the existing builtin "__builtin_assume", though the latter applies to a
single point in the program, and the former to a range (i.e. the whole
of the associated compound statement).

This patch fixes sollve's OpenMP 5.1 "omp assume"-based tests.

Change-Id: Ibd4a0e2af82c4ac818eaa3de8867a006307361ec
---
 clang/lib/Parse/ParseOpenMP.cpp          | 22 +++++++++++++
 clang/lib/Sema/SemaOpenMP.cpp            |  3 +-
 clang/test/OpenMP/assume_lambda.cpp      | 31 +++++++++++++++++
 clang/test/OpenMP/assume_messages.c      | 23 +++++++++++++
 clang/test/OpenMP/assume_messages_attr.c | 23 +++++++++++++
 clang/test/OpenMP/assume_template.cpp    | 42 ++++++++++++++++++++++++
 llvm/include/llvm/Frontend/OpenMP/OMP.td |  4 +++
 7 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/OpenMP/assume_lambda.cpp
 create mode 100644 clang/test/OpenMP/assume_messages.c
 create mode 100644 clang/test/OpenMP/assume_messages_attr.c
 create mode 100644 clang/test/OpenMP/assume_template.cpp

diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 50a872fedebf7..513af9846aa7e 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2444,6 +2444,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
   case OMPD_target_teams_loop:
   case OMPD_parallel_loop:
   case OMPD_target_parallel_loop:
+  case OMPD_assume:
     Diag(Tok, diag::err_omp_unexpected_directive)
         << 1 << getOpenMPDirectiveName(DKind);
     break;
@@ -3023,6 +3024,27 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
         << 1 << getOpenMPDirectiveName(DKind);
     SkipUntil(tok::annot_pragma_openmp_end);
     break;
+  case OMPD_assume: {
+    ParseScope OMPDirectiveScope(this, Scope::FnScope | Scope::DeclScope |
+                                 Scope::CompoundStmtScope);
+    ParseOpenMPAssumesDirective(DKind, ConsumeToken());
+
+    SkipUntil(tok::annot_pragma_openmp_end);
+
+    ParsingOpenMPDirectiveRAII NormalScope(*this);
+    StmtResult AssociatedStmt;
+    {
+      Sema::CompoundScopeRAII Scope(Actions);
+      AssociatedStmt = ParseStatement();
+      EndLoc = Tok.getLocation();
+      Directive = Actions.ActOnCompoundStmt(Loc, EndLoc,
+                                            AssociatedStmt.get(),
+                                            /*isStmtExpr=*/false);
+    }
+    ParseOpenMPEndAssumesDirective(Loc);
+    OMPDirectiveScope.Exit();
+    break;
+  }
   case OMPD_unknown:
   default:
     Diag(Tok, diag::err_omp_unknown_directive);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 5c759aedf9798..731d839caef03 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -3512,7 +3512,8 @@ void SemaOpenMP::ActOnOpenMPAssumesDirective(SourceLocation Loc,
 
   auto *AA =
       OMPAssumeAttr::Create(getASTContext(), llvm::join(Assumptions, ","), Loc);
-  if (DKind == llvm::omp::Directive::OMPD_begin_assumes) {
+  if (DKind == llvm::omp::Directive::OMPD_begin_assumes ||
+      DKind == llvm::omp::Directive::OMPD_assume) {
     OMPAssumeScoped.push_back(AA);
     return;
   }
diff --git a/clang/test/OpenMP/assume_lambda.cpp b/clang/test/OpenMP/assume_lambda.cpp
new file mode 100644
index 0000000000000..a38380ed4482a
--- /dev/null
+++ b/clang/test/OpenMP/assume_lambda.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -ast-print %s | FileCheck %s
+// expected-no-diagnostics
+
+extern int bar(int);
+
+int foo(int arg)
+{
+  #pragma omp assume no_openmp_routines
+  {
+    auto fn = [](int x) { return bar(x); };
+// CHECK: auto fn = [](int x) {
+    return fn(5);
+  }
+}
+
+class C {
+public:
+  int foo(int a);
+};
+
+// We're really just checking that this parses.  All the assumptions are thrown
+// away immediately for now.
+int C::foo(int a)
+{
+  #pragma omp assume holds(sizeof(T) == 8) absent(parallel)
+  {
+    auto fn = [](int x) { return bar(x); };
+// CHECK: auto fn = [](int x) {
+    return fn(5);
+  }
+}
\ No newline at end of file
diff --git a/clang/test/OpenMP/assume_messages.c b/clang/test/OpenMP/assume_messages.c
new file mode 100644
index 0000000000000..33c1c6f7c51e7
--- /dev/null
+++ b/clang/test/OpenMP/assume_messages.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -verify -fopenmp -x c -std=c99 %s
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -verify -fopenmp-simd -x c -std=c99 %s
+
+#pragma omp assume no_openmp // expected-error {{unexpected OpenMP directive '#pragma omp assume'}}
+
+void foo(void) {
+  #pragma omp assume hold(1==1) // expected-warning {{valid assume clauses start with 'ext_', 'absent', 'contains', 'holds', 'no_openmp', 'no_openmp_routines', 'no_parallelism'; tokens will be ignored}} expected-note {{the ignored tokens spans until here}}
+  {}
+}
+
+void bar(void) {
+  #pragma omp assume absent(target)
+} // expected-error {{expected statement}}
+
+void qux(void) {
+  #pragma omp assume extra_bits // expected-warning {{valid assume clauses start with 'ext_', 'absent', 'contains', 'holds', 'no_openmp', 'no_openmp_routines', 'no_parallelism'; token will be ignored}}
+  {}
+}
+
+void quux(void) {
+  #pragma omp assume ext_spelled_properly
+  {}
+}
diff --git a/clang/test/OpenMP/assume_messages_attr.c b/clang/test/OpenMP/assume_messages_attr.c
new file mode 100644
index 0000000000000..47504cc6308ea
--- /dev/null
+++ b/clang/test/OpenMP/assume_messages_attr.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -verify -fopenmp -x c -std=c99 %s
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -verify -fopenmp-simd -x c -std=c99 %s
+
+[[omp::directive(assume no_openmp)]] // expected-error {{unexpected OpenMP directive '#pragma omp assume'}}
+
+void foo(void) {
+  [[omp::directive(assume hold(1==1))]] // expected-warning {{valid assume clauses start with 'ext_', 'absent', 'contains', 'holds', 'no_openmp', 'no_openmp_routines', 'no_parallelism'; tokens will be ignored}} expected-note {{the ignored tokens spans until here}}
+  {}
+}
+
+void bar(void) {
+  [[omp::directive(assume absent(target))]]
+} // expected-error {{expected statement}}
+
+void qux(void) {
+  [[omp::directive(assume extra_bits)]] // expected-warning {{valid assume clauses start with 'ext_', 'absent', 'contains', 'holds', 'no_openmp', 'no_openmp_routines', 'no_parallelism'; token will be ignored}}
+  {}
+}
+
+void quux(void) {
+  [[omp::directive(assume ext_spelled_properly)]]
+  {}
+}
diff --git a/clang/test/OpenMP/assume_template.cpp b/clang/test/OpenMP/assume_template.cpp
new file mode 100644
index 0000000000000..b0591bffb20a6
--- /dev/null
+++ b/clang/test/OpenMP/assume_template.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+extern int qux(int);
+
+template<typename T>
+int foo(T arg)
+{
+  #pragma omp assume no_openmp_routines
+  {
+    auto fn = [](int x) { return qux(x); };
+// CHECK: auto fn = [](int x) {
+    return fn(5);
+  }
+}
+
+template<typename T>
+class C {
+  T m;
+
+public:
+  T bar(T a);
+};
+
+// We're really just checking this parses.  All the assumptions are thrown
+// away immediately for now.
+template<typename T>
+T C<T>::bar(T a)
+{
+  #pragma omp assume holds(sizeof(T) == 8) absent(parallel)
+  {
+    return (T)qux((int)a);
+// CHECK: return (T)qux((int)a);
+  }
+}
+
+#endif
\ No newline at end of file
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 12a944e34c414..bcfed1ea50f94 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -510,6 +510,10 @@ def OMP_EndAssumes : Directive<"end assumes"> {
   let association = AS_Delimited;
   let category = OMP_Assumes.category;
 }
+def OMP_Assume : Directive<"assume"> {
+  let association = AS_Block;
+  let category = CA_Informational;
+}
 def OMP_Atomic : Directive<"atomic"> {
   let allowedClauses = [
     VersionedClause<OMPC_Capture>,

>From 49870fb6bc6267b42b2deb52c81a29a1118247e2 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown at amd.com>
Date: Wed, 12 Jun 2024 13:58:22 -0500
Subject: [PATCH 2/2] [OpenMP] Diagnostic check for imperfect loop collapse

This patch adds a diagnostic which attempts to detect the case where the
"collapse" clause is used with imperfectly-nested parallel loops,
something like this:

  #pragma omp target
  #pragma omp parallel for collapse(2)
    for (int i = 0; i < N; i++) {
      arr[i][i] = ...;
      for (int j = 0; j < N; j++) {
        arr[i][j] = ...;
      }
    }

This kind of nesting is permitted by OpenMP 5+.

At a glance, this appears fine: the outer loop iterations are
independent, so can be executed in parallel, and the inner loop
iterations are also independent and can be executed in parallel.

However, the "collapse" clause works by essentially moving the
not-perfectly-nested statements into the innermost loop.  This is
sometimes harmless but inefficient (the statement gets executed more times
than a naive user might expect), but in this case the combined/collapsed
loop iterations now have a data dependency between them:

  for (int ij = 0; ij < N*N; ij++) {
    int i = ij / N, j = ij % N;
    arr[i][i] = ...; // all of these...
    arr[i][j] = ...; // ...would have to be executed before all of these
  }

...and that means the result is (silently!) incorrect.

Since this seems like an easy mistake to make, I was interested to find
out if there was a feasible and reasonably-accurate way to try to
diagnose it.  This is what I came up with.

Firstly, in Clang, memory load/store instructions emitted from
statements in the "imperfect" parts of loop nests are annotated with
a new annotation, "llvm.omp.loop.imperfection".  Then in LLVM proper,
in the OpenMPOpt pass (because I couldn't find anywhere that looked more
appropriate), memory load/store instructions in collapsed loops are
partitioned into two groups, with or without the annotation.  Then if
any of the first group may/must alias with any in the second group,
a warning (actually a "remark") is emitted.

The remark is opt-in.  The user must compile with
"-Rpass-analysis=openmp-opt" to trigger it.  That seems appropriate,
because the diagnostic potentially has a false-positive rate that is too
high for a regular warning, but on the other hand users aren't likely to
benefit from the true-positive warning unless they know to use the option.
Comments welcome.

FWIW, I don't think there's a reasonable, safe way to collapse loops
like this and maintain parallel semantics, but ICBW.
---
 clang/lib/CodeGen/CGStmtOpenMP.cpp           |  7 ++-
 clang/lib/CodeGen/CodeGenFunction.cpp        | 14 ++++-
 clang/lib/CodeGen/CodeGenFunction.h          | 22 +++++++
 clang/test/OpenMP/for_collapse_imperfect.cpp | 65 ++++++++++++++++++++
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp        | 56 +++++++++++++++++
 5 files changed, 162 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/OpenMP/for_collapse_imperfect.cpp

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f73d32de7c484..4a210bbea734c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1898,7 +1898,12 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
       return;
     }
   }
-  CGF.EmitStmt(S);
+  if (SimplifiedS != NextLoop) {
+    CodeGenFunction::OMPLoopImperfectionRAII OLI(CGF);
+    CGF.EmitStmt(S);
+  } else {
+    CGF.EmitStmt(S);
+  }
 }
 
 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index cea0d84c64bc4..6aa4ede0e4345 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -44,6 +44,8 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CRC.h"
 #include "llvm/Support/xxhash.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
@@ -2648,8 +2650,18 @@ void CGBuilderInserter::InsertHelper(
     llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
     llvm::BasicBlock::iterator InsertPt) const {
   llvm::IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt);
-  if (CGF)
+  if (CGF) {
     CGF->InsertHelper(I, Name, BB, InsertPt);
+    if (CGF->GetOMPLoopImperfection() &&
+        I->mayReadOrWriteMemory()) {
+      llvm::LLVMContext &Ctx = CGF->getLLVMContext();
+      llvm::MDNode *Imp = llvm::MDNode::get(Ctx,
+                                            llvm::ConstantAsMetadata::get(
+                                              llvm::ConstantInt::get(
+                                              llvm::Type::getInt1Ty(Ctx), 1)));
+      I->setMetadata("llvm.omp.loop.imperfection", Imp);
+    }
+  }
 }
 
 // Emits an error if we don't have a valid set of target features for the
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 8525f66082a4e..fd51c22504456 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3634,6 +3634,22 @@ class CodeGenFunction : public CodeGenTypeCache {
     ~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); }
   };
 
+  /// Controls emission of "llvm.omp.loop.imperfection" metadata on
+  /// load/store instructions.
+  class OMPLoopImperfectionRAII {
+    CodeGenFunction &CGF;
+    bool OldValue;
+
+  public:
+    OMPLoopImperfectionRAII(CodeGenFunction &CGF) : CGF(CGF) {
+      OldValue = CGF.GetOMPLoopImperfection();
+      CGF.SetOMPLoopImperfection (true);
+    }
+    ~OMPLoopImperfectionRAII() {
+      CGF.SetOMPLoopImperfection (OldValue);
+    }
+  };
+
   /// Returns calculated size of the specified type.
   llvm::Value *getTypeSize(QualType Ty);
   LValue InitCapturedStruct(const CapturedStmt &S);
@@ -3992,6 +4008,9 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Emits the lvalue for the expression with possibly captured variable.
   LValue EmitOMPSharedLValue(const Expr *E);
 
+  bool GetOMPLoopImperfection() { return OMPLoopImperfection; }
+  void SetOMPLoopImperfection(bool I) { OMPLoopImperfection = I; }
+
 private:
   /// Helpers for blocks.
   llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
@@ -4032,6 +4051,9 @@ class CodeGenFunction : public CodeGenTypeCache {
           IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB),
           NextUB(NextUB) {}
   };
+
+  bool OMPLoopImperfection = false;
+
   void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
                         const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
                         const OMPLoopArguments &LoopArgs,
diff --git a/clang/test/OpenMP/for_collapse_imperfect.cpp b/clang/test/OpenMP/for_collapse_imperfect.cpp
new file mode 100644
index 0000000000000..4da6e3dbdbb0a
--- /dev/null
+++ b/clang/test/OpenMP/for_collapse_imperfect.cpp
@@ -0,0 +1,65 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -verify=host -O2 -triple x86_64-unknown-unknown -Rpass-analysis=openmp-opt -fopenmp -x c++ -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86_64-host.bc
+// RUN: %clang_cc1 -verify=analysis -O2 -triple amdgcn-amd-amdhsa -Rpass-analysis=openmp-opt -fopenmp -x c++ -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86_64-host.bc -o %t.out
+
+// host-no-diagnostics
+
+#define N 256
+
+int main() {
+  double arr[N][N];
+  double b[N];
+  float c[N];
+
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      arr[j][i] = 0.0;
+
+  // These nested loops look parallelisable at a glance, but if they are
+  // collapsed, iterations are no longer data-independent with respect to each
+  // other.  So we emit a remark saying so.
+#pragma omp target map(tofrom: arr)
+#pragma omp parallel for collapse(2)
+  for (int i = 0; i < N; i++) {
+    arr[i][i] = i * 10; // #0
+    // analysis-remark@#0 {{Collapsing imperfectly-nested loop may introduce unexpected data dependencies}}
+    for (int j = 0; j < N; j++) {
+      arr[i][j]++;
+    }
+  }
+
+  // This is fine, the declaration of 'f' can't affect the array 'arr'.
+#pragma omp target map(tofrom: arr)
+#pragma omp parallel for collapse(2)
+  for (int i = 0; i < N; i++) {
+    double f = i * 10;
+    for (int j = 0; j < N; j++) {
+      arr[i][j] += (i == j) ? f : 1;
+    }
+  }
+
+  // The accesses in this loop could be disambiguated, but currently aren't.
+  // So this is a false positive for the remark.
+#pragma omp target map(tofrom: arr, b[0:N])
+#pragma omp parallel for collapse(2)
+  for (int i = 0; i < N; i++) {
+    b[i] = i; // #1
+    // analysis-remark@#1 {{Collapsing imperfectly-nested loop may introduce unexpected data dependencies}}
+    for (int j = 0; j < N; j++) {
+      arr[i][j]++;
+    }
+  }
+
+  // This is fine though, presumably TBAA takes care of it.  No remark emitted.
+#pragma omp target map(tofrom: arr, c[0:N])
+#pragma omp parallel for collapse(2)
+  for (int i = 0; i < N; i++) {
+    c[i] = i;
+    for (int j = 0; j < N; j++) {
+      arr[i][j]++;
+    }
+  }
+
+  return 0;
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index e3a4821b8226b..60acc35623205 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -5721,6 +5721,62 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
 
   FunctionAnalysisManager &FAM =
       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  // !!! This pass may be invoked at several points in the compilation
+  // pipeline, but we only want to emit these remarks once.  The outermost
+  // condition below is a somewhat crude attempt at ensuring that.
+  // Note that if -save-temps is used, duplicate remarks may be shown.
+  if (isOpenMPDevice(M)) {
+    for (Function &F : M) {
+      if (F.isDeclaration())
+        continue;
+      DominatorTree DT(F);
+      LoopInfo LI(DT);
+      for (const auto &L : LI) {
+        SmallVector<const Instruction *, 4> ImperfectMemInsns;
+        SmallVector<const Instruction *, 4> OtherMemInsns;
+        if (L->getName().starts_with("omp")) {
+          const auto &BBs = L->getBlocksVector();
+          for (const auto &BB : BBs) {
+            for (const auto &I : *BB) {
+              if (I.mayReadOrWriteMemory()) {
+                if (I.hasMetadata("llvm.omp.loop.imperfection")) {
+                  ImperfectMemInsns.push_back(&I);
+                } else {
+                  OtherMemInsns.push_back(&I);
+                }
+              }
+            }
+          }
+        }
+        if (!ImperfectMemInsns.empty()) {
+          AliasAnalysis &AA = FAM.getResult<AAManager>(F);
+          const Instruction *BadInsn = nullptr;
+          for (auto &O : OtherMemInsns) {
+            MemoryLocation OML = MemoryLocation::get(O);
+            for (auto &I : ImperfectMemInsns) {
+              MemoryLocation IML = MemoryLocation::get(I);
+              if (!AA.isNoAlias(OML, IML)) {
+                BadInsn = I;
+                break;
+              }
+            }
+            if (BadInsn)
+              break;
+          }
+          if (BadInsn) {
+            auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+            OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP190", BadInsn);
+            ORE.emit([&ORA]() {
+              return ORA << "Collapsing imperfectly-nested loop may "
+                            "introduce unexpected data dependencies";
+            });
+          }
+        }
+      }
+    }
+  }
+
   KernelSet Kernels = getDeviceKernels(M);
 
   if (PrintModuleBeforeOptimizations)



More information about the cfe-commits mailing list