[clang] [Clang] Use private address space for builtin_alloca return type for OpenCL (PR #95750)

Vikash Gupta via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 26 00:16:08 PDT 2024


https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/95750

>From ac967e4f887bd511bdfcaf30a0c94aa083cbf980 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 17 Jun 2024 11:20:02 +0530
Subject: [PATCH 1/9] [Clang] Use private address space for builtin_alloca for
 OpenCL

The __builtin_alloca was returning a flat pointer with no address
space when compiled using openCL1.2 or below but worked fine with
openCL2.0 and above. This accounts to the fact that later uses the
concept of generic address space which supports cast to other address
space(i.e to private address space which is used for stack allocation)
.

So, in general for OpenCL, built_alloca should always return pointer
to private address space, thus eliminating need of use of address
space cast. Thus,it requires redefintion of the builtin function with
return pointer type to private address space.
---
 clang/lib/Sema/SemaExpr.cpp                 | 23 +++++-
 clang/test/CodeGenOpenCL/builtins-alloca.cl | 86 +++++++++++++++++++++
 clang/test/CodeGenOpenCL/memcpy.cl          |  0
 3 files changed, 106 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-alloca.cl
 mode change 100644 => 100755 clang/test/CodeGenOpenCL/memcpy.cl

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 8d24e34520e77..cf4c98fbe2c38 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6121,7 +6121,10 @@ bool Sema::CheckArgsForPlaceholders(MultiExprArg args) {
 ///                  it does not contain any pointer arguments without
 ///                  an address space qualifer.  Otherwise the rewritten
 ///                  FunctionDecl is returned.
-/// TODO: Handle pointer return types.
+///
+/// Pointer return type with no explicit address space is assigned the
+/// default address space where pointer points to based on the language
+/// option used to compile it.
 static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
                                                 FunctionDecl *FDecl,
                                                 MultiExprArg ArgExprs) {
@@ -6165,13 +6168,27 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
     OverloadParams.push_back(Context.getPointerType(PointeeType));
   }
 
+  QualType ReturnTy = FT->getReturnType();
+  QualType OverloadReturnTy = ReturnTy;
+  if (ReturnTy->isPointerType() &&
+      !ReturnTy->getPointeeType().hasAddressSpace()) {
+    if (Sema->getLangOpts().OpenCL) {
+      NeedsNewDecl = true;
+
+      QualType ReturnPtTy = ReturnTy->getPointeeType();
+      LangAS defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
+      ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS);
+      OverloadReturnTy = Context.getPointerType(ReturnPtTy);
+    }
+  }
+
   if (!NeedsNewDecl)
     return nullptr;
 
   FunctionProtoType::ExtProtoInfo EPI;
   EPI.Variadic = FT->isVariadic();
-  QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
-                                                OverloadParams, EPI);
+  QualType OverloadTy =
+      Context.getFunctionType(OverloadReturnTy, OverloadParams, EPI);
   DeclContext *Parent = FDecl->getParent();
   FunctionDecl *OverloadDecl = FunctionDecl::Create(
       Context, Parent, FDecl->getLocation(), FDecl->getLocation(),
diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl
new file mode 100644
index 0000000000000..74a86955f2e4f
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -0,0 +1,86 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
+
+// OPENCL12-LABEL: define dso_local ptr addrspace(5) @test1(
+// OPENCL12-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    ret ptr addrspace(5) [[TMP1]]
+//
+// OPENCL20-LABEL: define dso_local ptr @test1(
+// OPENCL20-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL20-NEXT:  [[ENTRY:.*:]]
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL20-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL20-NEXT:    ret ptr [[TMP2]]
+//
+// OPENCL30-LABEL: define dso_local ptr addrspace(5) @test1(
+// OPENCL30-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    ret ptr addrspace(5) [[TMP1]]
+//
+// OPENCL30-EXT-LABEL: define dso_local ptr @test1(
+// OPENCL30-EXT-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    ret ptr [[TMP2]]
+//
+float* test1() {
+    float* alloc_ptr = (float*)__builtin_alloca(32 * sizeof(int));
+    return alloc_ptr;
+}
+
+// OPENCL12-LABEL: define dso_local void @test2(
+// OPENCL12-SAME: ) #[[ATTR0]] {
+// OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    ret void
+//
+// OPENCL20-LABEL: define dso_local void @test2(
+// OPENCL20-SAME: ) #[[ATTR0]] {
+// OPENCL20-NEXT:  [[ENTRY:.*:]]
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL20-NEXT:    ret void
+//
+// OPENCL30-LABEL: define dso_local void @test2(
+// OPENCL30-SAME: ) #[[ATTR0]] {
+// OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    ret void
+//
+// OPENCL30-EXT-LABEL: define dso_local void @test2(
+// OPENCL30-EXT-SAME: ) #[[ATTR0]] {
+// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    ret void
+//
+void test2() {
+    void *alloc_ptr = __builtin_alloca(28);
+}
diff --git a/clang/test/CodeGenOpenCL/memcpy.cl b/clang/test/CodeGenOpenCL/memcpy.cl
old mode 100644
new mode 100755

>From 8a5efde644bd7d6b96f26b33ffd8bc6352c428d5 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 17 Jun 2024 16:23:03 +0530
Subject: [PATCH 2/9] updated return pointer to always point to stack/private
 address space for buitins alloca variants.

---
 clang/lib/Sema/SemaExpr.cpp                 |  21 +-
 clang/test/CodeGenOpenCL/builtins-alloca.cl | 275 ++++++++++++++++----
 clang/test/CodeGenOpenCL/memcpy.cl          |   0
 3 files changed, 242 insertions(+), 54 deletions(-)
 mode change 100755 => 100644 clang/test/CodeGenOpenCL/memcpy.cl

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index cf4c98fbe2c38..aa976e75db66f 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6176,7 +6176,26 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
       NeedsNewDecl = true;
 
       QualType ReturnPtTy = ReturnTy->getPointeeType();
-      LangAS defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
+      unsigned BuiltinID = FDecl->getBuiltinID();
+      LangAS defClAS;
+
+      // __builtin_alloca* should always return pointer to stack/private
+      // Address Space, while for other builtins with return pointer type,
+      // it should depend on the OpenCL version.
+      switch (BuiltinID) {
+      case Builtin::BI__builtin_alloca_uninitialized:
+      case Builtin::BI__builtin_alloca:
+      case Builtin::BI__builtin_alloca_with_align_uninitialized:
+      case Builtin::BI__builtin_alloca_with_align: {
+        defClAS = LangAS::opencl_private;
+        break;
+      }
+      default: {
+        defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
+        break;
+      }
+      }
+
       ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS);
       OverloadReturnTy = Context.getPointerType(ReturnPtTy);
     }
diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl
index 74a86955f2e4f..2df6bf3dba6a3 100644
--- a/clang/test/CodeGenOpenCL/builtins-alloca.cl
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -1,86 +1,255 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
+// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \
+// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \
+// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \
+// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
 
-// OPENCL12-LABEL: define dso_local ptr addrspace(5) @test1(
-// OPENCL12-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL12-LABEL: define dso_local void @test1(
+// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 // OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL12-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL12-NEXT:    ret ptr addrspace(5) [[TMP1]]
+// OPENCL12-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL12-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL12-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL12-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
+// OPENCL12-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL12-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL12-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
+// OPENCL12-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL12-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL12-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
+// OPENCL12-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL12-NEXT:    ret void
 //
-// OPENCL20-LABEL: define dso_local ptr @test1(
-// OPENCL20-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL20-LABEL: define dso_local void @test1(
+// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 // OPENCL20-NEXT:  [[ENTRY:.*:]]
-// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
-// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
-// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
-// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
-// OPENCL20-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
-// OPENCL20-NEXT:    ret ptr [[TMP2]]
+// OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL20-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL20-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
+// OPENCL20-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL20-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL20-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
+// OPENCL20-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL20-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL20-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
+// OPENCL20-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL20-NEXT:    ret void
 //
-// OPENCL30-LABEL: define dso_local ptr addrspace(5) @test1(
-// OPENCL30-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-LABEL: define dso_local void @test1(
+// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 // OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-NEXT:    ret ptr addrspace(5) [[TMP1]]
+// OPENCL30-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL30-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL30-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL30-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
+// OPENCL30-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL30-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL30-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
+// OPENCL30-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL30-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL30-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
+// OPENCL30-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL30-NEXT:    ret void
 //
-// OPENCL30-EXT-LABEL: define dso_local ptr @test1(
-// OPENCL30-EXT-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-EXT-LABEL: define dso_local void @test1(
+// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 // OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
-// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
-// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
-// OPENCL30-EXT-NEXT:    ret ptr [[TMP2]]
+// OPENCL30-EXT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL30-EXT-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL30-EXT-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
+// OPENCL30-EXT-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL30-EXT-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
+// OPENCL30-EXT-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL30-EXT-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
+// OPENCL30-EXT-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL30-EXT-NEXT:    ret void
 //
-float* test1() {
-    float* alloc_ptr = (float*)__builtin_alloca(32 * sizeof(int));
-    return alloc_ptr;
+void test1(unsigned n) {
+    __private float* alloc_ptr = (__private float*)__builtin_alloca(n*sizeof(int));
+    __private float* alloc_ptr_uninitialized = (__private float*)__builtin_alloca_uninitialized(n*sizeof(int));
+    __private float* alloc_ptr_align = (__private float*)__builtin_alloca_with_align((n*sizeof(int)), 8);
+    __private float* alloc_ptr_align_uninitialized = (__private float*)__builtin_alloca_with_align_uninitialized((n*sizeof(int)), 8);
 }
 
 // OPENCL12-LABEL: define dso_local void @test2(
-// OPENCL12-SAME: ) #[[ATTR0]] {
+// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
 // OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL12-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL12-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL12-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL12-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL12-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL12-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL12-NEXT:    ret void
 //
 // OPENCL20-LABEL: define dso_local void @test2(
-// OPENCL20-SAME: ) #[[ATTR0]] {
+// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
 // OPENCL20-NEXT:  [[ENTRY:.*:]]
-// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
-// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
-// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
-// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL20-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL20-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL20-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL20-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL20-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL20-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2(
-// OPENCL30-SAME: ) #[[ATTR0]] {
+// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
 // OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL30-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL30-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL30-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL30-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL30-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL30-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL30-NEXT:    ret void
 //
 // OPENCL30-EXT-LABEL: define dso_local void @test2(
-// OPENCL30-EXT-SAME: ) #[[ATTR0]] {
+// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
 // OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
-// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL30-EXT-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL30-EXT-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL30-EXT-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL30-EXT-NEXT:    ret void
 //
-void test2() {
-    void *alloc_ptr = __builtin_alloca(28);
+void test2(unsigned n) {
+    __private void *alloc_ptr = __builtin_alloca(n);
+    __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n);
+    __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);;
+    __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8);
 }
diff --git a/clang/test/CodeGenOpenCL/memcpy.cl b/clang/test/CodeGenOpenCL/memcpy.cl
old mode 100755
new mode 100644

>From 8a4a143f1c5bf0e76f46e186b8dac560f3f10dc7 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Wed, 3 Jul 2024 12:36:33 +0530
Subject: [PATCH 3/9] Moved the patch to dedicated builtin function in
 SemaChecking.cpp & refactored LIT test.

---
 clang/lib/Sema/SemaChecking.cpp             |  29 +-
 clang/lib/Sema/SemaExpr.cpp                 |  42 +--
 clang/test/CodeGenOpenCL/builtins-alloca.cl | 363 +++++++-------------
 3 files changed, 157 insertions(+), 277 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 45b9bbb23dbf7..0a354c9bc42f7 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1477,6 +1477,31 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
   return false;
 }
 
+// In OpenCL, __builtin_alloca_* should return a pointer to address space
+// that corresponds to the stack address space i.e private address space.
+static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
+  S.Diag(TheCall->getBeginLoc(), diag::warn_alloca)
+      << TheCall->getDirectCallee();
+
+  QualType RT = TheCall->getType();
+  if (!RT->isPointerType() || RT->getPointeeType().hasAddressSpace())
+    return true;
+
+  if (S.getLangOpts().OpenCL) {
+    RT = RT->getPointeeType();
+
+    // __builtin_alloca* should always return pointer to stack/private
+    // Address Space, while for other builtins with return pointer type,
+    // it should depend on the OpenCL version.
+    LangAS openCLStackAS = LangAS::opencl_private;
+
+    RT = S.Context.getAddrSpaceQualType(RT, openCLStackAS);
+    TheCall->setType(S.Context.getPointerType(RT));
+  }
+
+  return false;
+}
+
 namespace {
 enum PointerAuthOpKind {
   PAO_Strip,
@@ -2208,8 +2233,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     [[fallthrough]];
   case Builtin::BI__builtin_alloca:
   case Builtin::BI__builtin_alloca_uninitialized:
-    Diag(TheCall->getBeginLoc(), diag::warn_alloca)
-        << TheCall->getDirectCallee();
+    if (OpenCLBuiltinAllocaAddrSpace(*this, TheCall))
+      return ExprError();
     break;
   case Builtin::BI__arithmetic_fence:
     if (BuiltinArithmeticFence(TheCall))
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index aa976e75db66f..8d24e34520e77 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6121,10 +6121,7 @@ bool Sema::CheckArgsForPlaceholders(MultiExprArg args) {
 ///                  it does not contain any pointer arguments without
 ///                  an address space qualifer.  Otherwise the rewritten
 ///                  FunctionDecl is returned.
-///
-/// Pointer return type with no explicit address space is assigned the
-/// default address space where pointer points to based on the language
-/// option used to compile it.
+/// TODO: Handle pointer return types.
 static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
                                                 FunctionDecl *FDecl,
                                                 MultiExprArg ArgExprs) {
@@ -6168,46 +6165,13 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
     OverloadParams.push_back(Context.getPointerType(PointeeType));
   }
 
-  QualType ReturnTy = FT->getReturnType();
-  QualType OverloadReturnTy = ReturnTy;
-  if (ReturnTy->isPointerType() &&
-      !ReturnTy->getPointeeType().hasAddressSpace()) {
-    if (Sema->getLangOpts().OpenCL) {
-      NeedsNewDecl = true;
-
-      QualType ReturnPtTy = ReturnTy->getPointeeType();
-      unsigned BuiltinID = FDecl->getBuiltinID();
-      LangAS defClAS;
-
-      // __builtin_alloca* should always return pointer to stack/private
-      // Address Space, while for other builtins with return pointer type,
-      // it should depend on the OpenCL version.
-      switch (BuiltinID) {
-      case Builtin::BI__builtin_alloca_uninitialized:
-      case Builtin::BI__builtin_alloca:
-      case Builtin::BI__builtin_alloca_with_align_uninitialized:
-      case Builtin::BI__builtin_alloca_with_align: {
-        defClAS = LangAS::opencl_private;
-        break;
-      }
-      default: {
-        defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
-        break;
-      }
-      }
-
-      ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS);
-      OverloadReturnTy = Context.getPointerType(ReturnPtTy);
-    }
-  }
-
   if (!NeedsNewDecl)
     return nullptr;
 
   FunctionProtoType::ExtProtoInfo EPI;
   EPI.Variadic = FT->isVariadic();
-  QualType OverloadTy =
-      Context.getFunctionType(OverloadReturnTy, OverloadParams, EPI);
+  QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
+                                                OverloadParams, EPI);
   DeclContext *Parent = FDecl->getParent();
   FunctionDecl *OverloadDecl = FunctionDecl::Create(
       Context, Parent, FDecl->getLocation(), FDecl->getLocation(),
diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl
index 2df6bf3dba6a3..02f5e0de8d1b0 100644
--- a/clang/test/CodeGenOpenCL/builtins-alloca.cl
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -1,255 +1,146 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL12 %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL20 %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30 %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30-EXT %s
 
-// OPENCL12-LABEL: define dso_local void @test1(
-// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-// OPENCL12-NEXT:  [[ENTRY:.*:]]
-// OPENCL12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL12-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
-// OPENCL12-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL12-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL12-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
-// OPENCL12-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL12-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL12-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
-// OPENCL12-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL12-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL12-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
-// OPENCL12-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL12-NEXT:    ret void
+// OPENCL-LABEL: define dso_local void @test1_builtin_alloca(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL-NEXT:    ret void
 //
-// OPENCL20-LABEL: define dso_local void @test1(
-// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-// OPENCL20-NEXT:  [[ENTRY:.*:]]
-// OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
-// OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL20-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL20-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
-// OPENCL20-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL20-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL20-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
-// OPENCL20-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL20-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL20-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
-// OPENCL20-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL20-NEXT:    ret void
-//
-// OPENCL30-LABEL: define dso_local void @test1(
-// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-// OPENCL30-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL30-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
-// OPENCL30-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL30-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
-// OPENCL30-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL30-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL30-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
-// OPENCL30-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL30-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL30-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
-// OPENCL30-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL30-NEXT:    ret void
-//
-// OPENCL30-EXT-LABEL: define dso_local void @test1(
-// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-EXT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL30-EXT-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
-// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL30-EXT-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
-// OPENCL30-EXT-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL30-EXT-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
-// OPENCL30-EXT-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL30-EXT-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
-// OPENCL30-EXT-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL30-EXT-NEXT:    ret void
-//
-void test1(unsigned n) {
+void test1_builtin_alloca(unsigned n) {
     __private float* alloc_ptr = (__private float*)__builtin_alloca(n*sizeof(int));
+}
+
+// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_uninitialized(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL-NEXT:    ret void
+//
+void test1_builtin_alloca_uninitialized(unsigned n) {
     __private float* alloc_ptr_uninitialized = (__private float*)__builtin_alloca_uninitialized(n*sizeof(int));
+}
+
+// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_with_align(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL-NEXT:    ret void
+//
+void test1_builtin_alloca_with_align(unsigned n) {
     __private float* alloc_ptr_align = (__private float*)__builtin_alloca_with_align((n*sizeof(int)), 8);
+}
+
+// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL-NEXT:    ret void
+//
+void test1_builtin_alloca_with_align_uninitialized(unsigned n) {
     __private float* alloc_ptr_align_uninitialized = (__private float*)__builtin_alloca_with_align_uninitialized((n*sizeof(int)), 8);
 }
 
-// OPENCL12-LABEL: define dso_local void @test2(
-// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
-// OPENCL12-NEXT:  [[ENTRY:.*:]]
-// OPENCL12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL12-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL12-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL12-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL12-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL12-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL12-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL12-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL12-NEXT:    ret void
+// OPENCL-LABEL: define dso_local void @test2_builtin_alloca(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL-NEXT:    ret void
 //
-// OPENCL20-LABEL: define dso_local void @test2(
-// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
-// OPENCL20-NEXT:  [[ENTRY:.*:]]
-// OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL20-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL20-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL20-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL20-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL20-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL20-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL20-NEXT:    ret void
+void test2_builtin_alloca(unsigned n) {
+    __private void *alloc_ptr = __builtin_alloca(n);
+}
+
+// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_uninitialized(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL-NEXT:    ret void
 //
-// OPENCL30-LABEL: define dso_local void @test2(
-// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
-// OPENCL30-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL30-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL30-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL30-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL30-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL30-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL30-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL30-NEXT:    ret void
+void test2_builtin_alloca_uninitialized(unsigned n) {
+    __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n);
+}
+
+// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_with_align(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL-NEXT:    ret void
 //
-// OPENCL30-EXT-LABEL: define dso_local void @test2(
-// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
-// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-EXT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL30-EXT-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL30-EXT-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL30-EXT-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL30-EXT-NEXT:    ret void
+void test2_builtin_alloca_with_align(unsigned n) {
+    __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);
+}
+
+// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL-NEXT:    ret void
 //
-void test2(unsigned n) {
-    __private void *alloc_ptr = __builtin_alloca(n);
-    __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n);
-    __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);;
+void test2_builtin_alloca_with_align_uninitialized(unsigned n) {
     __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8);
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// OPENCL12: {{.*}}
+// OPENCL20: {{.*}}
+// OPENCL30: {{.*}}
+// OPENCL30-EXT: {{.*}}

>From a1b8505aff13d1757b7df9cd95ac30e8204795eb Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Wed, 3 Jul 2024 15:15:17 +0530
Subject: [PATCH 4/9] Made some changes in comments in
 OpenCLBuiltinAllocaAddrSpace().

---
 clang/lib/Sema/SemaChecking.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 0a354c9bc42f7..f6f02a50c87bc 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1490,9 +1490,7 @@ static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
   if (S.getLangOpts().OpenCL) {
     RT = RT->getPointeeType();
 
-    // __builtin_alloca* should always return pointer to stack/private
-    // Address Space, while for other builtins with return pointer type,
-    // it should depend on the OpenCL version.
+    // Stack Address space corresponds to private address space.
     LangAS openCLStackAS = LangAS::opencl_private;
 
     RT = S.Context.getAddrSpaceQualType(RT, openCLStackAS);

>From e2ac64a9f2b6ba21c23ed702da3ba27620049479 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Wed, 10 Jul 2024 11:23:47 +0530
Subject: [PATCH 5/9] Removed non-OpenCL specific code snippet from
 OpenCLBuiltinAllocaAddrSpace().

---
 clang/lib/Sema/SemaChecking.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f6f02a50c87bc..27bc68f0598c5 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1480,9 +1480,6 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
 // In OpenCL, __builtin_alloca_* should return a pointer to address space
 // that corresponds to the stack address space i.e private address space.
 static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
-  S.Diag(TheCall->getBeginLoc(), diag::warn_alloca)
-      << TheCall->getDirectCallee();
-
   QualType RT = TheCall->getType();
   if (!RT->isPointerType() || RT->getPointeeType().hasAddressSpace())
     return true;
@@ -2231,6 +2228,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     [[fallthrough]];
   case Builtin::BI__builtin_alloca:
   case Builtin::BI__builtin_alloca_uninitialized:
+    Diag(TheCall->getBeginLoc(), diag::warn_alloca)
+        << TheCall->getDirectCallee();
     if (OpenCLBuiltinAllocaAddrSpace(*this, TheCall))
       return ExprError();
     break;

>From 3e2e427e1e0b67cac023ed9872a76bc79c550254 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Thu, 11 Jul 2024 13:56:13 +0530
Subject: [PATCH 6/9] Made suggested changes passing opencl_private directly.

---
 clang/lib/Sema/SemaChecking.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 27bc68f0598c5..31307c7fd73df 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1487,10 +1487,7 @@ static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
   if (S.getLangOpts().OpenCL) {
     RT = RT->getPointeeType();
 
-    // Stack Address space corresponds to private address space.
-    LangAS openCLStackAS = LangAS::opencl_private;
-
-    RT = S.Context.getAddrSpaceQualType(RT, openCLStackAS);
+    RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private);
     TheCall->setType(S.Context.getPointerType(RT));
   }
 

>From 9cae2ff8e8090946b70fb5adb2df4bb88e51d1fa Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 22 Jul 2024 12:52:36 +0530
Subject: [PATCH 7/9] Made the suggested changes around the
 builtinAllocaAddrSpace function.

---
 clang/lib/Sema/SemaChecking.cpp | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 31307c7fd73df..62372e0c9874f 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1479,17 +1479,14 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
 
 // In OpenCL, __builtin_alloca_* should return a pointer to address space
 // that corresponds to the stack address space i.e private address space.
-static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
+static bool builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
   QualType RT = TheCall->getType();
-  if (!RT->isPointerType() || RT->getPointeeType().hasAddressSpace())
-    return true;
-
-  if (S.getLangOpts().OpenCL) {
-    RT = RT->getPointeeType();
+  assert((RT->isPointerType() && !(RT->getPointeeType().hasAddressSpace())) &&
+         "__builtin_alloca has invalid address space");
 
-    RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private);
-    TheCall->setType(S.Context.getPointerType(RT));
-  }
+  RT = RT->getPointeeType();
+  RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private);
+  TheCall->setType(S.Context.getPointerType(RT));
 
   return false;
 }
@@ -2227,8 +2224,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_alloca_uninitialized:
     Diag(TheCall->getBeginLoc(), diag::warn_alloca)
         << TheCall->getDirectCallee();
-    if (OpenCLBuiltinAllocaAddrSpace(*this, TheCall))
-      return ExprError();
+    if (getLangOpts().OpenCL) {
+      if (builtinAllocaAddrSpace(*this, TheCall))
+        return ExprError();
+    }
     break;
   case Builtin::BI__arithmetic_fence:
     if (BuiltinArithmeticFence(TheCall))

>From 4811263911002969ca8bfdeb4e7531e3b45da608 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Tue, 23 Jul 2024 11:47:54 +0530
Subject: [PATCH 8/9] Modified return type of builtinAllocaAddreSpace function.

---
 clang/lib/Sema/SemaChecking.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 62372e0c9874f..f40900050aa10 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1479,7 +1479,7 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
 
 // In OpenCL, __builtin_alloca_* should return a pointer to address space
 // that corresponds to the stack address space i.e private address space.
-static bool builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
+static void builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
   QualType RT = TheCall->getType();
   assert((RT->isPointerType() && !(RT->getPointeeType().hasAddressSpace())) &&
          "__builtin_alloca has invalid address space");
@@ -1487,8 +1487,6 @@ static bool builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
   RT = RT->getPointeeType();
   RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private);
   TheCall->setType(S.Context.getPointerType(RT));
-
-  return false;
 }
 
 namespace {
@@ -2225,8 +2223,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     Diag(TheCall->getBeginLoc(), diag::warn_alloca)
         << TheCall->getDirectCallee();
     if (getLangOpts().OpenCL) {
-      if (builtinAllocaAddrSpace(*this, TheCall))
-        return ExprError();
+      builtinAllocaAddrSpace(*this, TheCall);
     }
     break;
   case Builtin::BI__arithmetic_fence:

>From c4145d272482187a1a2383c85eee1dcb23a742b8 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Fri, 26 Jul 2024 11:16:39 +0530
Subject: [PATCH 9/9] Removed unused version specific check-prefixes from
 builtins-alloca.cl test.

---
 clang/test/CodeGenOpenCL/builtins-alloca.cl | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl
index 02f5e0de8d1b0..474e95e74e006 100644
--- a/clang/test/CodeGenOpenCL/builtins-alloca.cl
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -1,12 +1,12 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL12 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL20 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30-EXT %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s
 
 // OPENCL-LABEL: define dso_local void @test1_builtin_alloca(
 // OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -139,8 +139,3 @@ void test2_builtin_alloca_with_align(unsigned n) {
 void test2_builtin_alloca_with_align_uninitialized(unsigned n) {
     __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8);
 }
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// OPENCL12: {{.*}}
-// OPENCL20: {{.*}}
-// OPENCL30: {{.*}}
-// OPENCL30-EXT: {{.*}}



More information about the cfe-commits mailing list