[llvm-branch-commits] [clang] a9ac5ac - [Clang][OpenMP] Fix the issue that `llvm.lifetime.end` is emitted too early for variables captured in linear clause

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Aug 15 00:32:35 PDT 2022


Author: Shilei Tian
Date: 2022-08-15T09:31:02+02:00
New Revision: a9ac5ac72816978bcf1a9d11def8e57dee25d330

URL: https://github.com/llvm/llvm-project/commit/a9ac5ac72816978bcf1a9d11def8e57dee25d330
DIFF: https://github.com/llvm/llvm-project/commit/a9ac5ac72816978bcf1a9d11def8e57dee25d330.diff

LOG: [Clang][OpenMP] Fix the issue that `llvm.lifetime.end` is emitted too early for variables captured in linear clause

Currently if an OpenMP program uses `linear` clause, and is compiled with
optimization, `llvm.lifetime.end` for variables listed in `linear` clause are
emitted too early such that there could still be uses after that. Let's take the
following code as example:
```
// loop.c
int j;
int *u;

void loop(int n) {
  int i;
  for (i = 0; i < n; ++i) {
    ++j;
    u = &j;
  }
}
```
We compile using the command:
```
clang -cc1 -fopenmp-simd -O3 -x c -triple x86_64-apple-darwin10 -emit-llvm loop.c -o loop.ll
```
The following IR (simplified) will be generated:
```
@j = local_unnamed_addr global i32 0, align 4
@u = local_unnamed_addr global ptr null, align 8

define void @loop(i32 noundef %n) local_unnamed_addr {
entry:
  %j = alloca i32, align 4
  %cmp = icmp sgt i32 %n, 0
  br i1 %cmp, label %simd.if.then, label %simd.if.end

simd.if.then:                                     ; preds = %entry
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %j)
  store ptr %j, ptr @u, align 8
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %j)
  %0 = load i32, ptr %j, align 4
  store i32 %0, ptr @j, align 4
  br label %simd.if.end

simd.if.end:                                      ; preds = %simd.if.then, %entry
  ret void
}
```
The most important part is:
```
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %j)
  %0 = load i32, ptr %j, align 4
  store i32 %0, ptr @j, align 4
```
`%j` is still loaded after `@llvm.lifetime.end.p0(i64 4, ptr nonnull %j)`. This
could cause the backend incorrectly optimizes the code and further generates
incorrect code. The root cause is, when we emit a construct that could have
`linear` clause, it usually has the following pattern:
```
EmitOMPLinearClauseInit(S)
{
  OMPPrivateScope LoopScope(*this);
  ...
  EmitOMPLinearClause(S, LoopScope);
  ...
  (void)LoopScope.Privatize();
  ...
}
EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
```
Variables that need to be privatized are added into `LoopScope`, which also
serves as a RAII object. When `LoopScope` is destructed and if optimization is
enabled, a `@llvm.lifetime.end` is also emitted for each privatized variable.
However, the writing back to original variables in `linear` clause happens after
the scope in `EmitOMPLinearClauseFinal`, causing the issue we see above.

A quick "fix" seems to be, moving `EmitOMPLinearClauseFinal` inside the scope.
However, it doesn't work. That's because the local variable map has been updated
by `LoopScope` such that a variable declaration is mapped to the privatized
variable, instead of the actual one. In that way, the following code will be
generated:
```
  %0 = load i32, ptr %j, align 4
  store i32 %0, ptr %j, align 4
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %j)
```
Well, now the life time is correct, but apparently the writing back is broken.

In this patch, a new function `OMPPrivateScope::restoreMap` is added and called
before calling `EmitOMPLinearClauseFinal`. This can make sure that
`EmitOMPLinearClauseFinal` can find the orignal varaibls to write back.

Fixes #56913.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D131272

(cherry picked from commit e21202dac18ed7f718d26a0e131f96b399b4891c)

Added: 
    clang/test/OpenMP/bug56913.c

Modified: 
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/test/OpenMP/for_linear_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index aa55cdaca5dca..570424dae7fc8 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2582,8 +2582,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
     emitPostUpdateForReductionClause(CGF, S,
                                      [](CodeGenFunction &) { return nullptr; });
+    LoopScope.restoreMap();
+    CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
   }
-  CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
   // Emit: if (PreCond) - end.
   if (ContBlock) {
     CGF.EmitBranch(ContBlock);
@@ -3426,11 +3427,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
         EmitOMPLastprivateClauseFinal(
             S, isOpenMPSimdDirective(S.getDirectiveKind()),
             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
+      LoopScope.restoreMap();
+      EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
+        return CGF.Builder.CreateIsNotNull(
+            CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
+      });
     }
-    EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
-      return CGF.Builder.CreateIsNotNull(
-          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
-    });
     DoacrossCleanupScope.ForceCleanup();
     // We're now done with the loop, so jump to the continuation block.
     if (ContBlock) {
@@ -7658,6 +7660,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
               (*LIP)->getType(), S.getBeginLoc())));
     }
+    LoopScope.restoreMap();
     CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
       return CGF.Builder.CreateIsNotNull(
           CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index fe0890f433e8c..672acd844525b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1094,7 +1094,7 @@ class CodeGenFunction : public CodeGenTypeCache {
 
     void ForceCleanup() {
       RunCleanupsScope::ForceCleanup();
-      MappedVars.restore(CGF);
+      restoreMap();
     }
 
     /// Exit scope - all the mapped variables are restored.
@@ -1108,6 +1108,11 @@ class CodeGenFunction : public CodeGenTypeCache {
       VD = VD->getCanonicalDecl();
       return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0;
     }
+
+    /// Restore all mapped variables w/o clean up. This is usefully when we want
+    /// to reference the original variables but don't want the clean up because
+    /// that could emit lifetime end too early, causing backend issue #56913.
+    void restoreMap() { MappedVars.restore(CGF); }
   };
 
   /// Save/restore original map of previously emitted local vars in case when we

diff  --git a/clang/test/OpenMP/bug56913.c b/clang/test/OpenMP/bug56913.c
new file mode 100644
index 0000000000000..cc7231626878c
--- /dev/null
+++ b/clang/test/OpenMP/bug56913.c
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -fopenmp-simd -O1 -x c -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
+
+int j;
+int *u;
+
+void loop(int n) {
+  int i;
+#pragma omp parallel master taskloop simd linear(j)
+  for (i = 0; i < n; ++i) {
+    ++j;
+    u = &j;
+  }
+}
+// CHECK-LABEL: define {{[^@]+}}@loop
+// CHECK-SAME: (i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
+// CHECK:       simd.if.then:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[J]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP8:![0-9]+]]
+// CHECK-NEXT:    [[INC_LE:%.*]] = add i32 [[TMP0]], [[N]]
+// CHECK-NEXT:    store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[J]]) #[[ATTR2]]
+// CHECK-NEXT:    br label [[SIMD_IF_END]]
+// CHECK:       simd.if.end:
+// CHECK-NEXT:    ret void
+//

diff  --git a/clang/test/OpenMP/for_linear_codegen.cpp b/clang/test/OpenMP/for_linear_codegen.cpp
index 05ccf94e4fd43..c7c9324dc1b66 100644
--- a/clang/test/OpenMP/for_linear_codegen.cpp
+++ b/clang/test/OpenMP/for_linear_codegen.cpp
@@ -322,18 +322,18 @@ int main() {
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]])
-// CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i64* [[DOTLVAR__ADDR]] to i8*
-// CHECK1-NEXT:    call void @__kmpc_free(i32 [[TMP5]], i8* [[TMP19]], i8* inttoptr (i64 5 to i8*))
-// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
-// CHECK1-NEXT:    br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]]
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
+// CHECK1-NEXT:    br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]]
 // CHECK1:       .omp.linear.pu:
-// CHECK1-NEXT:    [[TMP22:%.*]] = load float*, float** [[PVAR2]], align 8
-// CHECK1-NEXT:    store float* [[TMP22]], float** [[TMP0]], align 8
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTLVAR__ADDR]], align 8
-// CHECK1-NEXT:    store i64 [[TMP23]], i64* [[TMP1]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = load float*, float** [[PVAR2]], align 8
+// CHECK1-NEXT:    store float* [[TMP21]], float** [[TMP0]], align 8
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTLVAR__ADDR]], align 8
+// CHECK1-NEXT:    store i64 [[TMP22]], i64* [[TMP1]], align 8
 // CHECK1-NEXT:    br label [[DOTOMP_LINEAR_PU_DONE]]
 // CHECK1:       .omp.linear.pu.done:
+// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i64* [[DOTLVAR__ADDR]] to i8*
+// CHECK1-NEXT:    call void @__kmpc_free(i32 [[TMP5]], i8* [[TMP23]], i8* inttoptr (i64 5 to i8*))
 // CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]])
 // CHECK1-NEXT:    ret void
 //

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
index ef015914d6710..ffda7d2ad8019 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
@@ -1847,8 +1847,12 @@ void loop() {
 // CHECK6-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
 // CHECK6-NEXT:    br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
 // CHECK6:       .omp.linear.pu.i:
-// CHECK6-NEXT:    [[TMP39:%.*]] = load i32, i32* [[J_I]], align 4, !noalias !14
-// CHECK6-NEXT:    store i32 [[TMP39]], i32* [[J_I]], align 4, !noalias !14
+// CHECK6-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP40:%.*]] = load i32*, i32** [[TMP39]], align 8
+// CHECK6-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 1
+// CHECK6-NEXT:    [[TMP42:%.*]] = load i32*, i32** [[TMP41]], align 8
+// CHECK6-NEXT:    [[TMP43:%.*]] = load i32, i32* [[J_I]], align 4, !noalias !14
+// CHECK6-NEXT:    store i32 [[TMP43]], i32* [[TMP42]], align 4
 // CHECK6-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT]]
 // CHECK6:       .omp_outlined..1.exit:
 // CHECK6-NEXT:    ret i32 0


        


More information about the llvm-branch-commits mailing list