[clang] 37471cf - [clang][OpenMP] Local variable alignment incorrect with align clause

Tue May 3 13:20:41 PDT 2022

Author: David Pagan
Date: 2022-05-03T13:10:01-07:00
New Revision: 37471cf2c3fd02fac0b0dc6e513cfe6098f37764

URL: https://github.com/llvm/llvm-project/commit/37471cf2c3fd02fac0b0dc6e513cfe6098f37764
DIFF: https://github.com/llvm/llvm-project/commit/37471cf2c3fd02fac0b0dc6e513cfe6098f37764.diff

LOG: [clang][OpenMP] Local variable alignment incorrect with align clause

If alignment specified with align clause is less than natural alignment for
list item type, the alignment should be set to the natural alignment.

See OMP5.1 specification, page 185, lines 7-10

Differential Revision: https://reviews.llvm.org/D124676

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/align_clause_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 391245e3b54bb..aa8aa7d0bd5f8 100644

--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -12153,6 +12153,27 @@ static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
   return AllocVal;
 }
 
+/// Given the allocate directive list item type and align clause value,
+/// return appropriate alignment.
+static llvm::Value *getAlignmentValue(CodeGenFunction &CGF, QualType ListItemTy,
+                                      const Expr *Alignment) {
+  if (!Alignment)
+    return nullptr;
+
+  unsigned UserAlign =
+      Alignment->EvaluateKnownConstInt(CGF.getContext()).getExtValue();
+  CharUnits NaturalAlign = CGF.CGM.getNaturalTypeAlignment(ListItemTy);
+
+  // OpenMP5.1 pg 185 lines 7-10
+  //   Each item in the align modifier list must be aligned to the maximum
+  //   of the specified alignment and the type's natural alignment.
+  //
+  // If no alignment specified then use the natural alignment.
+  return llvm::ConstantInt::get(
+      CGF.CGM.SizeTy,
+      std::max<unsigned>(UserAlign, NaturalAlign.getQuantity()));
+}
+
 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                    const VarDecl *VD) {
   if (!VD)
@@ -12191,11 +12212,8 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
     const Expr *Allocator = AA->getAllocator();
     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
-    llvm::Value *Alignment =
-        AA->getAlignment()
-            ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
-                                        CGM.SizeTy, /*isSigned=*/false)
-            : nullptr;
+    llvm::Value *Alignment = getAlignmentValue(
+        CGF, VD->getType().getNonReferenceType(), AA->getAlignment());
     SmallVector<llvm::Value *, 4> Args;
     Args.push_back(ThreadID);
     if (Alignment)

diff  --git a/clang/test/OpenMP/align_clause_codegen.cpp b/clang/test/OpenMP/align_clause_codegen.cpp
index 5ced4652aba10..b8c31d1ca9b66 100644
--- a/clang/test/OpenMP/align_clause_codegen.cpp
+++ b/clang/test/OpenMP/align_clause_codegen.cpp
@@ -85,9 +85,9 @@ int template_test() {
 // CHECK-32-NEXT:  entry:
 // CHECK-32-NEXT:    [[MYALLOC:%.*]] = alloca i32, align 4
 // CHECK-32-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
-// CHECK-32-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 1, i32 20, i8* null)
+// CHECK-32-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 20, i8* null)
 // CHECK-32-NEXT:    [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]*
-// CHECK-32-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* inttoptr (i32 7 to i8*))
+// CHECK-32-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* inttoptr (i32 7 to i8*))
 // CHECK-32-NEXT:    [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]*
 // CHECK-32-NEXT:    [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* inttoptr (i32 6 to i8*))
 // CHECK-32-NEXT:    [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]*
@@ -114,15 +114,15 @@ int template_test() {
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 8 to i8*))
 // CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[MYALLOC]], align 4
 // CHECK-32-NEXT:    [[CONV:%.*]] = inttoptr i32 [[TMP3]] to i8*
-// CHECK-32-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 4, i8* [[CONV]])
+// CHECK-32-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 4, i8* [[CONV]])
 // CHECK-32-NEXT:    [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32*
 // CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[MYALLOC]], align 4
 // CHECK-32-NEXT:    [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to i8*
-// CHECK-32-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* [[CONV1]])
+// CHECK-32-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* [[CONV1]])
 // CHECK-32-NEXT:    [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]*
 // CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[MYALLOC]], align 4
 // CHECK-32-NEXT:    [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to i8*
-// CHECK-32-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 80, i8* [[CONV2]])
+// CHECK-32-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* [[CONV2]])
 // CHECK-32-NEXT:    [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]*
 // CHECK-32-NEXT:    [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null)
 // CHECK-32-NEXT:    [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**
@@ -196,9 +196,9 @@ int template_test() {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[MYALLOC:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
-// CHECK-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 1, i64 32, i8* null)
+// CHECK-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 32, i8* null)
 // CHECK-NEXT:    [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]*
-// CHECK-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* inttoptr (i64 7 to i8*))
+// CHECK-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* inttoptr (i64 7 to i8*))
 // CHECK-NEXT:    [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]*
 // CHECK-NEXT:    [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* inttoptr (i64 6 to i8*))
 // CHECK-NEXT:    [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]*
@@ -225,15 +225,15 @@ int template_test() {
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 8 to i8*))
 // CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[MYALLOC]], align 8
 // CHECK-NEXT:    [[CONV:%.*]] = inttoptr i64 [[TMP3]] to i8*
-// CHECK-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 4, i8* [[CONV]])
+// CHECK-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 4, i8* [[CONV]])
 // CHECK-NEXT:    [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32*
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[MYALLOC]], align 8
 // CHECK-NEXT:    [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to i8*
-// CHECK-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* [[CONV1]])
+// CHECK-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* [[CONV1]])
 // CHECK-NEXT:    [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]*
 // CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[MYALLOC]], align 8
 // CHECK-NEXT:    [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to i8*
-// CHECK-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 80, i8* [[CONV2]])
+// CHECK-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* [[CONV2]])
 // CHECK-NEXT:    [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]*
 // CHECK-NEXT:    [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, i8* null)
 // CHECK-NEXT:    [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**