[clang] [Clang] [WIP] Added builtin_alloca right Address Space for OpenCL (PR #95750)

Vikash Gupta via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 3 02:43:19 PDT 2024


https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/95750

>From cbe656fa6db50319e74c0fab166538518506974e Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 17 Jun 2024 11:20:02 +0530
Subject: [PATCH 1/4] [Clang] [WIP] Added builtin_alloca support for OpenCL1.2
 and below

The __builtin_alloca was returning a flat pointer with no address
space when compiled using openCL1.2 or below but worked fine with
openCL2.0 and above. This accounts to the fact that later uses the
concept of generic address space which supports cast to other address
space(i.e to private address space which is used for stack allocation)
.

So, in  case of openCL1.2 and below __built_alloca is supposed to
return pointer to private address space to eliminate the need of
casting as not supported here. Thus,it requires redefintion of the
builtin function with appropraite return pointer to appropriate
address space.
---
 clang/lib/Sema/SemaExpr.cpp                 | 23 +++++-
 clang/test/CodeGenOpenCL/builtins-alloca.cl | 86 +++++++++++++++++++++
 clang/test/CodeGenOpenCL/memcpy.cl          |  0
 3 files changed, 106 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-alloca.cl
 mode change 100644 => 100755 clang/test/CodeGenOpenCL/memcpy.cl

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 4db8b4130c3c7..bb63020dadb83 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6231,7 +6231,10 @@ bool Sema::CheckArgsForPlaceholders(MultiExprArg args) {
 ///                  it does not contain any pointer arguments without
 ///                  an address space qualifer.  Otherwise the rewritten
 ///                  FunctionDecl is returned.
-/// TODO: Handle pointer return types.
+///
+/// Pointer return type with no explicit address space is assigned the
+/// default address space where pointer points to based on the language
+/// option used to compile it.
 static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
                                                 FunctionDecl *FDecl,
                                                 MultiExprArg ArgExprs) {
@@ -6275,13 +6278,27 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
     OverloadParams.push_back(Context.getPointerType(PointeeType));
   }
 
+  QualType ReturnTy = FT->getReturnType();
+  QualType OverloadReturnTy = ReturnTy;
+  if (ReturnTy->isPointerType() &&
+      !ReturnTy->getPointeeType().hasAddressSpace()) {
+    if (Sema->getLangOpts().OpenCL) {
+      NeedsNewDecl = true;
+
+      QualType ReturnPtTy = ReturnTy->getPointeeType();
+      LangAS defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
+      ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS);
+      OverloadReturnTy = Context.getPointerType(ReturnPtTy);
+    }
+  }
+
   if (!NeedsNewDecl)
     return nullptr;
 
   FunctionProtoType::ExtProtoInfo EPI;
   EPI.Variadic = FT->isVariadic();
-  QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
-                                                OverloadParams, EPI);
+  QualType OverloadTy =
+      Context.getFunctionType(OverloadReturnTy, OverloadParams, EPI);
   DeclContext *Parent = FDecl->getParent();
   FunctionDecl *OverloadDecl = FunctionDecl::Create(
       Context, Parent, FDecl->getLocation(), FDecl->getLocation(),
diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl
new file mode 100644
index 0000000000000..74a86955f2e4f
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -0,0 +1,86 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
+
+// OPENCL12-LABEL: define dso_local ptr addrspace(5) @test1(
+// OPENCL12-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    ret ptr addrspace(5) [[TMP1]]
+//
+// OPENCL20-LABEL: define dso_local ptr @test1(
+// OPENCL20-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL20-NEXT:  [[ENTRY:.*:]]
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL20-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL20-NEXT:    ret ptr [[TMP2]]
+//
+// OPENCL30-LABEL: define dso_local ptr addrspace(5) @test1(
+// OPENCL30-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    ret ptr addrspace(5) [[TMP1]]
+//
+// OPENCL30-EXT-LABEL: define dso_local ptr @test1(
+// OPENCL30-EXT-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    ret ptr [[TMP2]]
+//
+float* test1() {
+    float* alloc_ptr = (float*)__builtin_alloca(32 * sizeof(int));
+    return alloc_ptr;
+}
+
+// OPENCL12-LABEL: define dso_local void @test2(
+// OPENCL12-SAME: ) #[[ATTR0]] {
+// OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    ret void
+//
+// OPENCL20-LABEL: define dso_local void @test2(
+// OPENCL20-SAME: ) #[[ATTR0]] {
+// OPENCL20-NEXT:  [[ENTRY:.*:]]
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL20-NEXT:    ret void
+//
+// OPENCL30-LABEL: define dso_local void @test2(
+// OPENCL30-SAME: ) #[[ATTR0]] {
+// OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    ret void
+//
+// OPENCL30-EXT-LABEL: define dso_local void @test2(
+// OPENCL30-EXT-SAME: ) #[[ATTR0]] {
+// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    ret void
+//
+void test2() {
+    void *alloc_ptr = __builtin_alloca(28);
+}
diff --git a/clang/test/CodeGenOpenCL/memcpy.cl b/clang/test/CodeGenOpenCL/memcpy.cl
old mode 100644
new mode 100755

>From 1dbb2d52d73ec1e299ceae60fb6ad443fc9833c7 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 17 Jun 2024 16:23:03 +0530
Subject: [PATCH 2/4] updated return pointer to always point to stack/private
 address space for buitins alloca variants.

---
 clang/lib/Sema/SemaExpr.cpp                 |  21 +-
 clang/test/CodeGenOpenCL/builtins-alloca.cl | 275 ++++++++++++++++----
 clang/test/CodeGenOpenCL/memcpy.cl          |   0
 3 files changed, 242 insertions(+), 54 deletions(-)
 mode change 100755 => 100644 clang/test/CodeGenOpenCL/memcpy.cl

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index bb63020dadb83..299d5222a06ca 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6286,7 +6286,26 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
       NeedsNewDecl = true;
 
       QualType ReturnPtTy = ReturnTy->getPointeeType();
-      LangAS defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
+      unsigned BuiltinID = FDecl->getBuiltinID();
+      LangAS defClAS;
+
+      // __builtin_alloca* should always return pointer to stack/private
+      // Address Space, while for other builtins with return pointer type,
+      // it should depend on the OpenCL version.
+      switch (BuiltinID) {
+      case Builtin::BI__builtin_alloca_uninitialized:
+      case Builtin::BI__builtin_alloca:
+      case Builtin::BI__builtin_alloca_with_align_uninitialized:
+      case Builtin::BI__builtin_alloca_with_align: {
+        defClAS = LangAS::opencl_private;
+        break;
+      }
+      default: {
+        defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
+        break;
+      }
+      }
+
       ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS);
       OverloadReturnTy = Context.getPointerType(ReturnPtTy);
     }
diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl
index 74a86955f2e4f..2df6bf3dba6a3 100644
--- a/clang/test/CodeGenOpenCL/builtins-alloca.cl
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -1,86 +1,255 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
+// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \
+// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \
+// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \
+// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
 
-// OPENCL12-LABEL: define dso_local ptr addrspace(5) @test1(
-// OPENCL12-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL12-LABEL: define dso_local void @test1(
+// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 // OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL12-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL12-NEXT:    ret ptr addrspace(5) [[TMP1]]
+// OPENCL12-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL12-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL12-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL12-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
+// OPENCL12-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL12-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL12-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
+// OPENCL12-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL12-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL12-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
+// OPENCL12-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL12-NEXT:    ret void
 //
-// OPENCL20-LABEL: define dso_local ptr @test1(
-// OPENCL20-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL20-LABEL: define dso_local void @test1(
+// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 // OPENCL20-NEXT:  [[ENTRY:.*:]]
-// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
-// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
-// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
-// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
-// OPENCL20-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
-// OPENCL20-NEXT:    ret ptr [[TMP2]]
+// OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL20-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL20-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
+// OPENCL20-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL20-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL20-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
+// OPENCL20-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL20-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL20-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
+// OPENCL20-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL20-NEXT:    ret void
 //
-// OPENCL30-LABEL: define dso_local ptr addrspace(5) @test1(
-// OPENCL30-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-LABEL: define dso_local void @test1(
+// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 // OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-NEXT:    ret ptr addrspace(5) [[TMP1]]
+// OPENCL30-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL30-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL30-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL30-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
+// OPENCL30-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL30-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL30-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
+// OPENCL30-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL30-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL30-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
+// OPENCL30-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL30-NEXT:    ret void
 //
-// OPENCL30-EXT-LABEL: define dso_local ptr @test1(
-// OPENCL30-EXT-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-EXT-LABEL: define dso_local void @test1(
+// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 // OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
-// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
-// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
-// OPENCL30-EXT-NEXT:    ret ptr [[TMP2]]
+// OPENCL30-EXT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL30-EXT-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL30-EXT-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
+// OPENCL30-EXT-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL30-EXT-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
+// OPENCL30-EXT-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL30-EXT-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
+// OPENCL30-EXT-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL30-EXT-NEXT:    ret void
 //
-float* test1() {
-    float* alloc_ptr = (float*)__builtin_alloca(32 * sizeof(int));
-    return alloc_ptr;
+void test1(unsigned n) {
+    __private float* alloc_ptr = (__private float*)__builtin_alloca(n*sizeof(int));
+    __private float* alloc_ptr_uninitialized = (__private float*)__builtin_alloca_uninitialized(n*sizeof(int));
+    __private float* alloc_ptr_align = (__private float*)__builtin_alloca_with_align((n*sizeof(int)), 8);
+    __private float* alloc_ptr_align_uninitialized = (__private float*)__builtin_alloca_with_align_uninitialized((n*sizeof(int)), 8);
 }
 
 // OPENCL12-LABEL: define dso_local void @test2(
-// OPENCL12-SAME: ) #[[ATTR0]] {
+// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
 // OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL12-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL12-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL12-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL12-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL12-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL12-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL12-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL12-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL12-NEXT:    ret void
 //
 // OPENCL20-LABEL: define dso_local void @test2(
-// OPENCL20-SAME: ) #[[ATTR0]] {
+// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
 // OPENCL20-NEXT:  [[ENTRY:.*:]]
-// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
-// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
-// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
-// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL20-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL20-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL20-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL20-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL20-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL20-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL20-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL20-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2(
-// OPENCL30-SAME: ) #[[ATTR0]] {
+// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
 // OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL30-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL30-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL30-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL30-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL30-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL30-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL30-NEXT:    ret void
 //
 // OPENCL30-EXT-LABEL: define dso_local void @test2(
-// OPENCL30-EXT-SAME: ) #[[ATTR0]] {
+// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
 // OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
-// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL30-EXT-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// OPENCL30-EXT-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
+// OPENCL30-EXT-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL30-EXT-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL30-EXT-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
+// OPENCL30-EXT-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
+// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL30-EXT-NEXT:    ret void
 //
-void test2() {
-    void *alloc_ptr = __builtin_alloca(28);
+void test2(unsigned n) {
+    __private void *alloc_ptr = __builtin_alloca(n);
+    __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n);
+    __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);;
+    __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8);
 }
diff --git a/clang/test/CodeGenOpenCL/memcpy.cl b/clang/test/CodeGenOpenCL/memcpy.cl
old mode 100755
new mode 100644

>From 726bfb0c06009c2dd330b2890a52d6503271c170 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Wed, 3 Jul 2024 12:36:33 +0530
Subject: [PATCH 3/4] Moved the patch to dedicated builtin function in
 SemaChecking.cpp & refactored LIT test.

---
 clang/lib/Sema/SemaChecking.cpp             |  29 +-
 clang/lib/Sema/SemaExpr.cpp                 |  42 +--
 clang/test/CodeGenOpenCL/builtins-alloca.cl | 363 +++++++-------------
 3 files changed, 157 insertions(+), 277 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 07cd0727eb3f4..6ab1ce2ea11fa 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1981,6 +1981,31 @@ static bool OpenCLBuiltinToAddr(Sema &S, unsigned BuiltinID, CallExpr *Call) {
   return false;
 }
 
+// In OpenCL, __builtin_alloca_* should return a pointer to address space
+// that corresponds to the stack address space i.e private address space.
+static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
+  S.Diag(TheCall->getBeginLoc(), diag::warn_alloca)
+      << TheCall->getDirectCallee();
+
+  QualType RT = TheCall->getType();
+  if (!RT->isPointerType() || RT->getPointeeType().hasAddressSpace())
+    return true;
+
+  if (S.getLangOpts().OpenCL) {
+    RT = RT->getPointeeType();
+
+    // __builtin_alloca* should always return pointer to stack/private
+    // Address Space, while for other builtins with return pointer type,
+    // it should depend on the OpenCL version.
+    LangAS openCLStackAS = LangAS::opencl_private;
+
+    RT = S.Context.getAddrSpaceQualType(RT, openCLStackAS);
+    TheCall->setType(S.Context.getPointerType(RT));
+  }
+
+  return false;
+}
+
 namespace {
 enum PointerAuthOpKind {
   PAO_Strip,
@@ -2606,8 +2631,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     [[fallthrough]];
   case Builtin::BI__builtin_alloca:
   case Builtin::BI__builtin_alloca_uninitialized:
-    Diag(TheCall->getBeginLoc(), diag::warn_alloca)
-        << TheCall->getDirectCallee();
+    if (OpenCLBuiltinAllocaAddrSpace(*this, TheCall))
+      return ExprError();
     break;
   case Builtin::BI__arithmetic_fence:
     if (BuiltinArithmeticFence(TheCall))
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 299d5222a06ca..4db8b4130c3c7 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6231,10 +6231,7 @@ bool Sema::CheckArgsForPlaceholders(MultiExprArg args) {
 ///                  it does not contain any pointer arguments without
 ///                  an address space qualifer.  Otherwise the rewritten
 ///                  FunctionDecl is returned.
-///
-/// Pointer return type with no explicit address space is assigned the
-/// default address space where pointer points to based on the language
-/// option used to compile it.
+/// TODO: Handle pointer return types.
 static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
                                                 FunctionDecl *FDecl,
                                                 MultiExprArg ArgExprs) {
@@ -6278,46 +6275,13 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
     OverloadParams.push_back(Context.getPointerType(PointeeType));
   }
 
-  QualType ReturnTy = FT->getReturnType();
-  QualType OverloadReturnTy = ReturnTy;
-  if (ReturnTy->isPointerType() &&
-      !ReturnTy->getPointeeType().hasAddressSpace()) {
-    if (Sema->getLangOpts().OpenCL) {
-      NeedsNewDecl = true;
-
-      QualType ReturnPtTy = ReturnTy->getPointeeType();
-      unsigned BuiltinID = FDecl->getBuiltinID();
-      LangAS defClAS;
-
-      // __builtin_alloca* should always return pointer to stack/private
-      // Address Space, while for other builtins with return pointer type,
-      // it should depend on the OpenCL version.
-      switch (BuiltinID) {
-      case Builtin::BI__builtin_alloca_uninitialized:
-      case Builtin::BI__builtin_alloca:
-      case Builtin::BI__builtin_alloca_with_align_uninitialized:
-      case Builtin::BI__builtin_alloca_with_align: {
-        defClAS = LangAS::opencl_private;
-        break;
-      }
-      default: {
-        defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
-        break;
-      }
-      }
-
-      ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS);
-      OverloadReturnTy = Context.getPointerType(ReturnPtTy);
-    }
-  }
-
   if (!NeedsNewDecl)
     return nullptr;
 
   FunctionProtoType::ExtProtoInfo EPI;
   EPI.Variadic = FT->isVariadic();
-  QualType OverloadTy =
-      Context.getFunctionType(OverloadReturnTy, OverloadParams, EPI);
+  QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
+                                                OverloadParams, EPI);
   DeclContext *Parent = FDecl->getParent();
   FunctionDecl *OverloadDecl = FunctionDecl::Create(
       Context, Parent, FDecl->getLocation(), FDecl->getLocation(),
diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl
index 2df6bf3dba6a3..02f5e0de8d1b0 100644
--- a/clang/test/CodeGenOpenCL/builtins-alloca.cl
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -1,255 +1,146 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL12 %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL20 %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30 %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \
-// RUN:     -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
+// RUN:     -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30-EXT %s
 
-// OPENCL12-LABEL: define dso_local void @test1(
-// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-// OPENCL12-NEXT:  [[ENTRY:.*:]]
-// OPENCL12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL12-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
-// OPENCL12-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL12-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL12-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
-// OPENCL12-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL12-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL12-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
-// OPENCL12-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL12-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL12-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
-// OPENCL12-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL12-NEXT:    ret void
+// OPENCL-LABEL: define dso_local void @test1_builtin_alloca(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL-NEXT:    ret void
 //
-// OPENCL20-LABEL: define dso_local void @test1(
-// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-// OPENCL20-NEXT:  [[ENTRY:.*:]]
-// OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
-// OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL20-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL20-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
-// OPENCL20-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL20-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL20-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
-// OPENCL20-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL20-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL20-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
-// OPENCL20-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL20-NEXT:    ret void
-//
-// OPENCL30-LABEL: define dso_local void @test1(
-// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-// OPENCL30-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL30-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
-// OPENCL30-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL30-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
-// OPENCL30-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL30-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL30-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
-// OPENCL30-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL30-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL30-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
-// OPENCL30-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL30-NEXT:    ret void
-//
-// OPENCL30-EXT-LABEL: define dso_local void @test1(
-// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-EXT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL30-EXT-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
-// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL30-EXT-NEXT:    [[MUL2:%.*]] = mul i64 [[CONV1]], 4
-// OPENCL30-EXT-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL30-EXT-NEXT:    [[MUL4:%.*]] = mul i64 [[CONV3]], 4
-// OPENCL30-EXT-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV5:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL30-EXT-NEXT:    [[MUL6:%.*]] = mul i64 [[CONV5]], 4
-// OPENCL30-EXT-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL30-EXT-NEXT:    ret void
-//
-void test1(unsigned n) {
+void test1_builtin_alloca(unsigned n) {
     __private float* alloc_ptr = (__private float*)__builtin_alloca(n*sizeof(int));
+}
+
+// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_uninitialized(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL-NEXT:    ret void
+//
+void test1_builtin_alloca_uninitialized(unsigned n) {
     __private float* alloc_ptr_uninitialized = (__private float*)__builtin_alloca_uninitialized(n*sizeof(int));
+}
+
+// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_with_align(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL-NEXT:    ret void
+//
+void test1_builtin_alloca_with_align(unsigned n) {
     __private float* alloc_ptr_align = (__private float*)__builtin_alloca_with_align((n*sizeof(int)), 8);
+}
+
+// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL-NEXT:    ret void
+//
+void test1_builtin_alloca_with_align_uninitialized(unsigned n) {
     __private float* alloc_ptr_align_uninitialized = (__private float*)__builtin_alloca_with_align_uninitialized((n*sizeof(int)), 8);
 }
 
-// OPENCL12-LABEL: define dso_local void @test2(
-// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
-// OPENCL12-NEXT:  [[ENTRY:.*:]]
-// OPENCL12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL12-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL12-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL12-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL12-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL12-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL12-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL12-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL12-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL12-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
-// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL12-NEXT:    ret void
+// OPENCL-LABEL: define dso_local void @test2_builtin_alloca(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
+// OPENCL-NEXT:    ret void
 //
-// OPENCL20-LABEL: define dso_local void @test2(
-// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
-// OPENCL20-NEXT:  [[ENTRY:.*:]]
-// OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL20-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL20-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL20-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL20-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL20-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL20-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL20-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL20-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL20-NEXT:    ret void
+void test2_builtin_alloca(unsigned n) {
+    __private void *alloc_ptr = __builtin_alloca(n);
+}
+
+// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_uninitialized(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
+// OPENCL-NEXT:    ret void
 //
-// OPENCL30-LABEL: define dso_local void @test2(
-// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
-// OPENCL30-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL30-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL30-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL30-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL30-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL30-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL30-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
-// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL30-NEXT:    ret void
+void test2_builtin_alloca_uninitialized(unsigned n) {
+    __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n);
+}
+
+// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_with_align(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
+// OPENCL-NEXT:    ret void
 //
-// OPENCL30-EXT-LABEL: define dso_local void @test2(
-// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
-// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
-// OPENCL30-EXT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// OPENCL30-EXT-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
-// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
-// OPENCL30-EXT-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV2:%.*]] = zext i32 [[TMP4]] to i64
-// OPENCL30-EXT-NEXT:    [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
-// OPENCL30-EXT-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
-// OPENCL30-EXT-NEXT:    [[CONV3:%.*]] = zext i32 [[TMP6]] to i64
-// OPENCL30-EXT-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5)
-// OPENCL30-EXT-NEXT:    store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
-// OPENCL30-EXT-NEXT:    ret void
+void test2_builtin_alloca_with_align(unsigned n) {
+    __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);
+}
+
+// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized(
+// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// OPENCL-NEXT:  [[ENTRY:.*:]]
+// OPENCL-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// OPENCL-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// OPENCL-NEXT:    store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// OPENCL-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// OPENCL-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
+// OPENCL-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
+// OPENCL-NEXT:    ret void
 //
-void test2(unsigned n) {
-    __private void *alloc_ptr = __builtin_alloca(n);
-    __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n);
-    __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);;
+void test2_builtin_alloca_with_align_uninitialized(unsigned n) {
     __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8);
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// OPENCL12: {{.*}}
+// OPENCL20: {{.*}}
+// OPENCL30: {{.*}}
+// OPENCL30-EXT: {{.*}}

>From f4ae5f5406789c2104eb0286115b8de312e5c688 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Wed, 3 Jul 2024 15:15:17 +0530
Subject: [PATCH 4/4] Made some changes in comments in
 OpenCLBuiltinAllocaAddrSpace().

---
 clang/lib/Sema/SemaChecking.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 6ab1ce2ea11fa..c5dca0a6a697a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1994,9 +1994,7 @@ static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
   if (S.getLangOpts().OpenCL) {
     RT = RT->getPointeeType();
 
-    // __builtin_alloca* should always return pointer to stack/private
-    // Address Space, while for other builtins with return pointer type,
-    // it should depend on the OpenCL version.
+    // Stack Address space corresponds to private address space.
     LangAS openCLStackAS = LangAS::opencl_private;
 
     RT = S.Context.getAddrSpaceQualType(RT, openCLStackAS);



More information about the cfe-commits mailing list