[llvm-branch-commits] [clang] e67ad2d - Revert "[OpenMP][SIMD][FIX] Use conservative "omp simd ordered" lowering (#12…"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Feb 6 07:02:04 PST 2025
Author: Alexey Bataev
Date: 2025-02-06T10:02:01-05:00
New Revision: e67ad2df6223218b3b1e9e0b067c1df6fcc432fd
URL: https://github.com/llvm/llvm-project/commit/e67ad2df6223218b3b1e9e0b067c1df6fcc432fd
DIFF: https://github.com/llvm/llvm-project/commit/e67ad2df6223218b3b1e9e0b067c1df6fcc432fd.diff
LOG: Revert "[OpenMP][SIMD][FIX] Use conservative "omp simd ordered" lowering (#12…"
This reverts commit 60d8e6f5286002dc8fe836aa6229a83cd84ff51c.
Added:
Modified:
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/ordered_codegen.cpp
Removed:
clang/test/OpenMP/simd_conservative_ordered.c
################################################################################
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 3542e939678cf4..8e694b95dc7e73 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2457,86 +2457,10 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
}
}
-// Check for the presence of an `OMPOrderedDirective`,
-// i.e., `ordered` in `#pragma omp ordered simd`.
-//
-// Consider the following source code:
-// ```
-// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
-// {
-// for (int r = 1; r < ARRAY_SIZE; ++r) {
-// for (int c = 1; c < ARRAY_SIZE; ++c) {
-// #pragma omp simd
-// for (int k = 2; k < ARRAY_SIZE; ++k) {
-// #pragma omp ordered simd
-// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
-// }
-// }
-// }
-// }
-// ```
-//
-// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
-// &D)`. By examining `D.dump()` we have the following AST containing
-// `OMPOrderedDirective`:
-//
-// ```
-// OMPSimdDirective 0x1c32950
-// `-CapturedStmt 0x1c32028
-// |-CapturedDecl 0x1c310e8
-// | |-ForStmt 0x1c31e30
-// | | |-DeclStmt 0x1c31298
-// | | | `-VarDecl 0x1c31208 used k 'int' cinit
-// | | | `-IntegerLiteral 0x1c31278 'int' 2
-// | | |-<<<NULL>>>
-// | | |-BinaryOperator 0x1c31308 'int' '<'
-// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
-// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
-// | | | `-IntegerLiteral 0x1c312d0 'int' 256
-// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
-// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
-// | | `-CompoundStmt 0x1c31e18
-// | | `-OMPOrderedDirective 0x1c31dd8
-// | | |-OMPSimdClause 0x1c31380
-// | | `-CapturedStmt 0x1c31cd0
-// ```
-//
-// Note the presence of `OMPOrderedDirective` above:
-// It's (transitively) nested in a `CapturedStmt` representing the pragma
-// annotated compound statement. Thus, we need to consider this nesting and
-// include checking the `getCapturedStmt` in this case.
-static bool hasOrderedDirective(const Stmt *S) {
- if (isa<OMPOrderedDirective>(S))
- return true;
-
- if (const auto *CS = dyn_cast<CapturedStmt>(S))
- return hasOrderedDirective(CS->getCapturedStmt());
-
- for (const Stmt *Child : S->children()) {
- if (Child && hasOrderedDirective(Child))
- return true;
- }
-
- return false;
-}
-
-static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
- LoopInfoStack &LoopStack) {
- // Check for the presence of an `OMPOrderedDirective`
- // i.e., `ordered` in `#pragma omp ordered simd`
- bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt);
- // If present then conservatively disable loop vectorization
- // analogously to how `emitSimdlenSafelenClause` does.
- if (HasOrderedDirective)
- LoopStack.setParallel(/*Enable=*/false);
-}
-
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
// Walk clauses and process safelen/lastprivate.
LoopStack.setParallel(/*Enable=*/true);
LoopStack.setVectorizeEnable();
- const Stmt *AssociatedStmt = D.getAssociatedStmt();
- applyConservativeSimdOrderedDirective(*AssociatedStmt, LoopStack);
emitSimdlenSafelenClause(*this, D);
if (const auto *C = D.getSingleClause<OMPOrderClause>())
if (C->getKind() == OMPC_ORDER_concurrent)
diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp
index 5cd95f1927e5ce..67285cfaef34d5 100644
--- a/clang/test/OpenMP/ordered_codegen.cpp
+++ b/clang/test/OpenMP/ordered_codegen.cpp
@@ -572,30 +572,30 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1
// CHECK1-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1
// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -645,31 +645,31 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV16]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
// CHECK1: omp.inner.for.cond29:
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
// CHECK1: omp.inner.for.body32:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
// CHECK1-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]]
-// CHECK1-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
// CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM35]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4
-// CHECK1-NEXT: call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
// CHECK1: omp.body.continue37:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
// CHECK1: omp.inner.for.inc38:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
-// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK1: omp.inner.for.end40:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
@@ -1201,32 +1201,32 @@ void foo_simd(int low, int up) {
// CHECK1-IRBUILDER-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.cond:
-// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1
// CHECK1-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
// CHECK1-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body:
-// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1
// CHECK1-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]]
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body.ordered.after:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1-IRBUILDER: omp.body.continue:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.inc:
-// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK1-IRBUILDER: omp.inner.for.end:
// CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -1278,34 +1278,34 @@ void foo_simd(int low, int up) {
// CHECK1-IRBUILDER-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV16]], align 4
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.cond30:
-// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1
// CHECK1-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]]
// CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body33:
-// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1
// CHECK1-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]]
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64
// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM36]]
-// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4
-// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body33.ordered.after:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]]
// CHECK1-IRBUILDER: omp.body.continue38:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.inc39:
-// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]])
-// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]])
-// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK1-IRBUILDER: omp.inner.for.end42:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1-IRBUILDER: omp.dispatch.inc:
@@ -1812,30 +1812,30 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1
// CHECK3-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1
// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -1885,31 +1885,31 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV16]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
// CHECK3: omp.inner.for.cond29:
-// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK3-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK3-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
// CHECK3: omp.inner.for.body32:
-// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
// CHECK3-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]]
-// CHECK3-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4
-// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK3-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
// CHECK3-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM35]]
-// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4
-// CHECK3-NEXT: call void @__captured_stmt.1(ptr [[I28]])
+// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK3-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
// CHECK3: omp.body.continue37:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
// CHECK3: omp.inner.for.inc38:
-// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
-// CHECK3-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4
-// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK3-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK3: omp.inner.for.end40:
// CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK3: omp.dispatch.inc:
@@ -2441,32 +2441,32 @@ void foo_simd(int low, int up) {
// CHECK3-IRBUILDER-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3-IRBUILDER: omp.inner.for.cond:
-// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1
// CHECK3-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
// CHECK3-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3-IRBUILDER: omp.inner.for.body:
-// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1
// CHECK3-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]]
-// CHECK3-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK3-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
// CHECK3-IRBUILDER: omp.inner.for.body.ordered.after:
// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3-IRBUILDER: omp.body.continue:
// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-IRBUILDER: omp.inner.for.inc:
-// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1
-// CHECK3-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK3-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK3-IRBUILDER: omp.inner.for.end:
// CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -2518,34 +2518,34 @@ void foo_simd(int low, int up) {
// CHECK3-IRBUILDER-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV16]], align 4
// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]]
// CHECK3-IRBUILDER: omp.inner.for.cond30:
-// CHECK3-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK3-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK3-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1
// CHECK3-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]]
// CHECK3-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]]
// CHECK3-IRBUILDER: omp.inner.for.body33:
-// CHECK3-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK3-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK3-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1
// CHECK3-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]]
-// CHECK3-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4
-// CHECK3-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK3-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK3-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64
// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM36]]
-// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4
-// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]])
+// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]]
// CHECK3-IRBUILDER: omp.inner.for.body33.ordered.after:
// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]]
// CHECK3-IRBUILDER: omp.body.continue38:
// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]]
// CHECK3-IRBUILDER: omp.inner.for.inc39:
-// CHECK3-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1
-// CHECK3-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4
+// CHECK3-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]])
-// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]])
-// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK3-IRBUILDER: omp.inner.for.end42:
// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK3-IRBUILDER: omp.dispatch.inc:
@@ -2885,33 +2885,33 @@ void foo_simd(int low, int up) {
// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]]
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK5-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1
// CHECK5-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1
// CHECK5-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK5-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK5-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK5-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP12]] to i64
// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM9]]
-// CHECK5-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX10]], align 4
+// CHECK5-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP13]], 1
-// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -2951,33 +2951,33 @@ void foo_simd(int low, int up) {
// CHECK5-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV30]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND32:%.*]]
// CHECK5: omp.inner.for.cond32:
-// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4
-// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]]
+// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]]
// CHECK5-NEXT: [[ADD33:%.*]] = add i32 [[TMP27]], 1
// CHECK5-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP26]], [[ADD33]]
// CHECK5-NEXT: br i1 [[CMP34]], label [[OMP_INNER_FOR_BODY35:%.*]], label [[OMP_INNER_FOR_END45:%.*]]
// CHECK5: omp.inner.for.body35:
-// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4
+// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4, !llvm.access.group [[ACC_GRP13]]
+// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group [[ACC_GRP13]]
// CHECK5-NEXT: [[MUL36:%.*]] = mul i32 [[TMP29]], 1
// CHECK5-NEXT: [[ADD37:%.*]] = add i32 [[TMP28]], [[MUL36]]
-// CHECK5-NEXT: store i32 [[ADD37]], ptr [[I31]], align 4
-// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[I31]], align 4
+// CHECK5-NEXT: store i32 [[ADD37]], ptr [[I31]], align 4, !llvm.access.group [[ACC_GRP13]]
+// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[I31]], align 4, !llvm.access.group [[ACC_GRP13]]
// CHECK5-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP30]] to i64
// CHECK5-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM38]]
-// CHECK5-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX39]], align 4
-// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[I31]], align 4
+// CHECK5-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX39]], align 4, !llvm.access.group [[ACC_GRP13]]
+// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[I31]], align 4, !llvm.access.group [[ACC_GRP13]]
// CHECK5-NEXT: [[IDXPROM40:%.*]] = sext i32 [[TMP31]] to i64
// CHECK5-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM40]]
-// CHECK5-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX41]], align 4
+// CHECK5-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX41]], align 4, !llvm.access.group [[ACC_GRP13]]
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE42:%.*]]
// CHECK5: omp.body.continue42:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC43:%.*]]
// CHECK5: omp.inner.for.inc43:
-// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4
+// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group [[ACC_GRP13]]
// CHECK5-NEXT: [[ADD44:%.*]] = add i32 [[TMP32]], 1
-// CHECK5-NEXT: store i32 [[ADD44]], ptr [[DOTOMP_IV30]], align 4
-// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND32]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK5-NEXT: store i32 [[ADD44]], ptr [[DOTOMP_IV30]], align 4, !llvm.access.group [[ACC_GRP13]]
+// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND32]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK5: omp.inner.for.end45:
// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_20]], align 4
diff --git a/clang/test/OpenMP/simd_conservative_ordered.c b/clang/test/OpenMP/simd_conservative_ordered.c
deleted file mode 100644
index 15fb61c7263a37..00000000000000
--- a/clang/test/OpenMP/simd_conservative_ordered.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "omp_simd_loop" --check-globals smart --filter "access|\%omp.inner.for.cond.*\!llvm.loop"
-//
-// RUN: %clang -g0 -fopenmp-simd -x c -S -emit-llvm %s -o - | FileCheck %s
-#include <float.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-int compare_float(float x1, float x2, float scalar) {
- const float
diff = fabsf(x1 - x2);
- x1 = fabsf(x1);
- x2 = fabsf(x2);
- const float l = (x2 > x1) ? x2 : x1;
- if (
diff <= l * scalar * FLT_EPSILON)
- return 1;
- else
- return 0;
-}
-
-#define ARRAY_SIZE 256
-
-__attribute__((noinline)) void initialization_loop(
- float X[ARRAY_SIZE][ARRAY_SIZE], float Y[ARRAY_SIZE][ARRAY_SIZE]) {
- const float max = 1000.0;
- srand(time(NULL));
- for (int r = 0; r < ARRAY_SIZE; r++) {
- for (int c = 0; c < ARRAY_SIZE; c++) {
- X[r][c] = ((float)rand() / (float)(RAND_MAX)) * max;
- Y[r][c] = X[r][c];
- }
- }
-}
-
-// CHECK-LABEL: @omp_simd_loop(
-// do not emit llvm.access.group metadata due to usage of ordered clause.
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}}
-// CHECK-NOT: store i32 {{.+}}, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load ptr, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load float, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: call float @sinf(float noundef {{.+}}) #[[ATTR:[0-9]+]], llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load ptr, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: store float {{.+}}, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: load i32, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-// CHECK-NOT: store i32 {{.+}}, ptr {{.+}}, align {{.+}}, llvm.access.group {{![0-9]+}}
-//
-// CHECK: br label [[OMP_INNER_FOR_COND:%.*]], !llvm.loop [[LOOP9:![0-9]+]]
-//
-__attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE]) {
- for (int r = 1; r < ARRAY_SIZE; ++r) {
- for (int c = 1; c < ARRAY_SIZE; ++c) {
-#pragma omp simd
- for (int k = 2; k < ARRAY_SIZE; ++k) {
-#pragma omp ordered simd
- X[r][k] = X[r][k - 2] + sinf((float)(r / c));
- }
- }
- }
-}
-
-__attribute__((noinline)) int comparison_loop(float X[ARRAY_SIZE][ARRAY_SIZE],
- float Y[ARRAY_SIZE][ARRAY_SIZE]) {
- int totalErrors_simd = 0;
- const float scalar = 1.0;
- for (int r = 1; r < ARRAY_SIZE; ++r) {
- for (int c = 1; c < ARRAY_SIZE; ++c) {
- for (int k = 2; k < ARRAY_SIZE; ++k) {
- Y[r][k] = Y[r][k - 2] + sinf((float)(r / c));
- }
- }
- // check row for simd update
- for (int k = 0; k < ARRAY_SIZE; ++k) {
- if (!compare_float(X[r][k], Y[r][k], scalar)) {
- ++totalErrors_simd;
- }
- }
- }
- return totalErrors_simd;
-}
-
-int main(void) {
- float X[ARRAY_SIZE][ARRAY_SIZE];
- float Y[ARRAY_SIZE][ARRAY_SIZE];
-
- initialization_loop(X, Y);
- omp_simd_loop(X);
- const int totalErrors_simd = comparison_loop(X, Y);
-
- if (totalErrors_simd) {
- fprintf(stdout, "totalErrors_simd: %d \n", totalErrors_simd);
- fprintf(stdout, "%s : %d - FAIL: error in ordered simd computation.\n",
- __FILE__, __LINE__);
- } else {
- fprintf(stdout, "Success!\n");
- }
-
- return totalErrors_simd;
-}
-//.
-// CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]}
-// CHECK-NOT: !{!"llvm.loop.parallel_accesses"
-// CHECK: [[META10]] = !{!"llvm.loop.vectorize.enable", i1 true}
-//.
More information about the llvm-branch-commits
mailing list