[clang] 3b9a85d - [CFE][Codegen] Make sure to maintain the contiguity of all the static allocas
via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 9 19:16:06 PST 2021
Author: hsmahesha
Date: 2021-11-10T08:45:21+05:30
New Revision: 3b9a85d10ac7a073c95b35adca379281ac6ecbcb
URL: https://github.com/llvm/llvm-project/commit/3b9a85d10ac7a073c95b35adca379281ac6ecbcb
DIFF: https://github.com/llvm/llvm-project/commit/3b9a85d10ac7a073c95b35adca379281ac6ecbcb.diff
LOG: [CFE][Codegen] Make sure to maintain the contiguity of all the static allocas
at the start of the entry block, which in turn would aid better code transformation/optimization.
Reviewed By: rnk
Differential Revision: https://reviews.llvm.org/D110257
Added:
Modified:
clang/lib/CodeGen/CGExpr.cpp
clang/lib/CodeGen/CodeGenFunction.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/CodeGenCUDA/builtins-amdgcn.cu
clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
clang/test/CodeGenCXX/amdgcn-func-arg.cpp
clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
clang/test/CodeGenCXX/vla.cpp
clang/test/CodeGenSYCL/address-space-deduction.cpp
clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 0d83d6a0d9665..4332e74dbb244 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -95,7 +95,7 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
// otherwise alloca is inserted at the current insertion point of the
// builder.
if (!ArraySize)
- Builder.SetInsertPoint(AllocaInsertPt);
+ Builder.SetInsertPoint(getPostAllocaInsertPoint());
V = getTargetHooks().performAddrSpaceCast(
*this, V, getASTAllocaAddressSpace(), LangAS::Default,
Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index c3c2899e93ec7..d87cf2d497209 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -424,6 +424,14 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
AllocaInsertPt = nullptr;
Ptr->eraseFromParent();
+ // PostAllocaInsertPt, if created, was lazily created when it was required,
+ // remove it now since it was just created for our own convenience.
+ if (PostAllocaInsertPt) {
+ llvm::Instruction *PostPtr = PostAllocaInsertPt;
+ PostAllocaInsertPt = nullptr;
+ PostPtr->eraseFromParent();
+ }
+
// If someone took the address of a label but never did an indirect goto, we
// made a zero entry PHI node, which is illegal, zap it now.
if (IndirectBranch) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index dd60e21b0ce18..e8207511813c4 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -379,6 +379,35 @@ class CodeGenFunction : public CodeGenTypeCache {
/// we prefer to insert allocas.
llvm::AssertingVH<llvm::Instruction> AllocaInsertPt;
+private:
+ /// PostAllocaInsertPt - This is a place in the prologue where code can be
+ /// inserted that will be dominated by all the static allocas. This helps
+ /// achieve two things:
+ /// 1. Contiguity of all static allocas (within the prologue) is maintained.
+ /// 2. All other prologue code (which are dominated by static allocas) do
+ /// appear in the source order immediately after all static allocas.
+ ///
+ /// PostAllocaInsertPt will be lazily created when it is *really* required.
+ llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
+
+public:
+ /// Return PostAllocaInsertPt. If it is not yet created, then insert it
+ /// immediately after AllocaInsertPt.
+ llvm::Instruction *getPostAllocaInsertPoint() {
+ if (!PostAllocaInsertPt) {
+ assert(AllocaInsertPt &&
+ "Expected static alloca insertion point at function prologue");
+ auto *EBB = AllocaInsertPt->getParent();
+ assert(EBB->isEntryBlock() &&
+ "EBB should be entry block of the current code gen function");
+ PostAllocaInsertPt = AllocaInsertPt->clone();
+ PostAllocaInsertPt->setName("postallocapt");
+ PostAllocaInsertPt->insertAfter(AllocaInsertPt);
+ }
+
+ return PostAllocaInsertPt;
+ }
+
/// API for captured statement code generation.
class CGCapturedStmtInfo {
public:
diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
index 6ba606b4e82c2..e1054d7d3bfba 100644
--- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -12,10 +12,10 @@
// CHECK-LABEL: @_Z16use_dispatch_ptrPi(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT]] to i32**
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT_ADDR]] to i32**
// CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca i32*, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT]] to i32**
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT_ADDR]] to i32**
// CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[DISPATCH_PTR]] to i32**
// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32 addrspace(1)* [[OUT_COERCE:%.*]] to i32*
// CHECK-NEXT: store i32* [[TMP0]], i32** [[OUT_ASCAST]], align 8
@@ -36,19 +36,20 @@ __global__ void use_dispatch_ptr(int* out) {
}
__global__
-// CHECK-LABEL: @_Z12test_ds_fmaxf(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
-// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
-// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false)
-// CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4
-// CHECK-NEXT: ret void
-//
-void test_ds_fmax(float src) {
+ // CHECK-LABEL: @_Z12test_ds_fmaxf(
+ // CHECK-NEXT: entry:
+ // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+ // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
+ // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
+ // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
+ // CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4
+ // CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4
+ // CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false)
+ // CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4
+ // CHECK-NEXT: ret void
+ //
+ void
+ test_ds_fmax(float src) {
__shared__ float shared;
volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false);
}
@@ -56,8 +57,8 @@ void test_ds_fmax(float src) {
// CHECK-LABEL: @_Z12test_ds_faddf(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4
@@ -73,12 +74,12 @@ __global__ void test_ds_fadd(float src) {
// CHECK-LABEL: @_Z12test_ds_fminfPf(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5)
-// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float**
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5)
-// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float**
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float**
+// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
+// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float**
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float*
// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8
@@ -123,12 +124,12 @@ __global__ void endpgm() {
// CHECK-LABEL: @_Z14test_uicmp_i64Pyyy(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64**
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64**
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[A_ADDR]] to i64*
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64**
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64**
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[A_ADDR]] to i64*
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[B_ADDR]] to i64*
// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64*
// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8
@@ -153,8 +154,8 @@ __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, un
// CHECK-LABEL: @_Z14test_s_memtimePy(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64**
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64**
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64**
// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64*
// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8
@@ -176,12 +177,12 @@ __device__ void func(float *x);
// CHECK-LABEL: @_Z17test_ds_fmin_funcfPf(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5)
-// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float**
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5)
-// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float**
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float**
+// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
+// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float**
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float*
// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8
@@ -202,14 +203,13 @@ __global__ void test_ds_fmin_func(float src, float *__restrict shared) {
func(shared);
}
-
// CHECK-LABEL: @_Z14test_is_sharedPf(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[X:%.*]] = alloca float*, align 8, addrspace(5)
-// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X]] to float**
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca float*, align 8, addrspace(5)
-// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X_ADDR]] to float**
// CHECK-NEXT: [[RET:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X]] to float**
+// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X_ADDR]] to float**
// CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast i8 addrspace(5)* [[RET]] to i8*
// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[X_COERCE:%.*]] to float*
// CHECK-NEXT: store float* [[TMP0]], float** [[X_ASCAST]], align 8
diff --git a/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp b/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
index 16eb318cd4f60..e5fc67a6871b6 100644
--- a/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
@@ -17,16 +17,16 @@ void func1(int *x) {
// CHECK-LABEL: @_Z5func2v(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV1]] to i32*
// CHECK-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV2]] to i32*
// CHECK-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5)
-// CHECK-NEXT: [[LA_ASCAST:%.*]] = addrspacecast [100 x i32] addrspace(5)* [[LA]] to [100 x i32]*
// CHECK-NEXT: [[LP1:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP1]] to i32**
// CHECK-NEXT: [[LP2:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP2]] to i32**
// CHECK-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV1]] to i32*
+// CHECK-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV2]] to i32*
+// CHECK-NEXT: [[LA_ASCAST:%.*]] = addrspacecast [100 x i32] addrspace(5)* [[LA]] to [100 x i32]*
+// CHECK-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP1]] to i32**
+// CHECK-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP2]] to i32**
// CHECK-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LVC]] to i32*
// CHECK-NEXT: store i32 1, i32* [[LV1_ASCAST]], align 4
// CHECK-NEXT: store i32 2, i32* [[LV2_ASCAST]], align 4
diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
index 782ce83327500..af82a028e332c 100644
--- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -33,8 +33,8 @@ void func_with_indirect_arg(A a) {
// CHECK-LABEL: @_Z22test_indirect_arg_autov(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A:%.*]] = alloca [[CLASS_A:%.*]], align 4, addrspace(5)
-// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[A]] to %class.A*
// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_A]], align 4, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[A]] to %class.A*
// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[AGG_TMP]] to %class.A*
// CHECK-NEXT: call void @_ZN1AC1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]])
// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.A* [[AGG_TMP_ASCAST]] to i8*
@@ -85,8 +85,8 @@ void func_with_byval_arg(B b) {
// CHECK-LABEL: @_Z19test_byval_arg_autov(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[B:%.*]] = alloca [[CLASS_B:%.*]], align 4, addrspace(5)
-// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[B]] to %class.B*
// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_B]], align 4, addrspace(5)
+// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[B]] to %class.B*
// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[AGG_TMP]] to %class.B*
// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.B* [[AGG_TMP_ASCAST]] to i8*
// CHECK-NEXT: [[TMP1:%.*]] = bitcast %class.B* [[B_ASCAST]] to i8*
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
index e9a2cdd2ac7b6..8703fec7b13a2 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
@@ -6,8 +6,8 @@
// CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32**
// CHECK-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32**
// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32*
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
@@ -32,8 +32,8 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr)
// CHECK-LABEL: @_Z29test_non_volatile_parameter64Py(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64**
// CHECK-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64**
// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64*
// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
@@ -58,8 +58,8 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr)
// CHECK-LABEL: @_Z25test_volatile_parameter32PVj(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32**
// CHECK-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32**
// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32*
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
@@ -84,8 +84,8 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__
// CHECK-LABEL: @_Z25test_volatile_parameter64PVy(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64**
// CHECK-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64**
// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64*
// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
diff --git a/clang/test/CodeGenCXX/vla.cpp b/clang/test/CodeGenCXX/vla.cpp
index 858fd40d1edbd..d50fa973fe3de 100644
--- a/clang/test/CodeGenCXX/vla.cpp
+++ b/clang/test/CodeGenCXX/vla.cpp
@@ -18,19 +18,19 @@ int f() {
// rdar://problem/9506377
void test0(void *array, int n) {
// CHECK-LABEL: define{{.*}} void @_Z5test0Pvi(
- // X64: [[ARRAY:%.*]] = alloca i8*, align 8
// AMDGCN: [[ARRAY0:%.*]] = alloca i8*, align 8, addrspace(5)
+ // AMDGCN-NEXT: [[N0:%.*]] = alloca i32, align 4, addrspace(5)
+ // AMDGCN-NEXT: [[REF0:%.*]] = alloca i16*, align 8, addrspace(5)
+ // AMDGCN-NEXT: [[S0:%.*]] = alloca i16, align 2, addrspace(5)
// AMDGCN-NEXT: [[ARRAY:%.*]] = addrspacecast i8* addrspace(5)* [[ARRAY0]] to i8**
- // X64-NEXT: [[N:%.*]] = alloca i32, align 4
- // AMDGCN: [[N0:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[N:%.*]] = addrspacecast i32 addrspace(5)* [[N0]] to i32*
- // X64-NEXT: [[REF:%.*]] = alloca i16*, align 8
- // AMDGCN: [[REF0:%.*]] = alloca i16*, align 8, addrspace(5)
// AMDGCN-NEXT: [[REF:%.*]] = addrspacecast i16* addrspace(5)* [[REF0]] to i16**
- // X64-NEXT: [[S:%.*]] = alloca i16, align 2
- // AMDGCN: [[S0:%.*]] = alloca i16, align 2, addrspace(5)
// AMDGCN-NEXT: [[S:%.*]] = addrspacecast i16 addrspace(5)* [[S0]] to i16*
- // CHECK-NEXT: store i8*
+ // X64: [[ARRAY:%.*]] = alloca i8*, align 8
+ // X64-NEXT: [[N:%.*]] = alloca i32, align 4
+ // X64-NEXT: [[REF:%.*]] = alloca i16*, align 8
+ // X64-NEXT: [[S:%.*]] = alloca i16, align 2
+ // CHECK-NEXT: store i8*
// CHECK-NEXT: store i32
// Capture the bounds.
diff --git a/clang/test/CodeGenSYCL/address-space-deduction.cpp b/clang/test/CodeGenSYCL/address-space-deduction.cpp
index 936b6ce3a03f0..3453d18787c26 100644
--- a/clang/test/CodeGenSYCL/address-space-deduction.cpp
+++ b/clang/test/CodeGenSYCL/address-space-deduction.cpp
@@ -1,34 +1,33 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
-
// CHECK-LABEL: @_Z4testv(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast i32* [[I]] to i32 addrspace(4)*
// CHECK-NEXT: [[PPTR:%.*]] = alloca i32 addrspace(4)*, align 8
-// CHECK-NEXT: [[PPTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[PPTR]] to i32 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[IS_I_PTR:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[IS_I_PTR_ASCAST:%.*]] = addrspacecast i8* [[IS_I_PTR]] to i8 addrspace(4)*
// CHECK-NEXT: [[VAR23:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[VAR23_ASCAST:%.*]] = addrspacecast i32* [[VAR23]] to i32 addrspace(4)*
// CHECK-NEXT: [[CP:%.*]] = alloca i8 addrspace(4)*, align 8
-// CHECK-NEXT: [[CP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CP]] to i8 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[ARR:%.*]] = alloca [42 x i32], align 4
-// CHECK-NEXT: [[ARR_ASCAST:%.*]] = addrspacecast [42 x i32]* [[ARR]] to [42 x i32] addrspace(4)*
// CHECK-NEXT: [[CPP:%.*]] = alloca i8 addrspace(4)*, align 8
-// CHECK-NEXT: [[CPP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CPP]] to i8 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[APTR:%.*]] = alloca i32 addrspace(4)*, align 8
-// CHECK-NEXT: [[APTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[APTR]] to i32 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[STR:%.*]] = alloca i8 addrspace(4)*, align 8
-// CHECK-NEXT: [[STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[STR]] to i8 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[PHI_STR:%.*]] = alloca i8 addrspace(4)*, align 8
-// CHECK-NEXT: [[PHI_STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[PHI_STR]] to i8 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[SELECT_NULL:%.*]] = alloca i8 addrspace(4)*, align 8
-// CHECK-NEXT: [[SELECT_NULL_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_NULL]] to i8 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[SELECT_STR_TRIVIAL1:%.*]] = alloca i8 addrspace(4)*, align 8
-// CHECK-NEXT: [[SELECT_STR_TRIVIAL1_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL1]] to i8 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[SELECT_STR_TRIVIAL2:%.*]] = alloca i8 addrspace(4)*, align 8
+// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast i32* [[I]] to i32 addrspace(4)*
+// CHECK-NEXT: [[PPTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[PPTR]] to i32 addrspace(4)* addrspace(4)*
+// CHECK-NEXT: [[IS_I_PTR_ASCAST:%.*]] = addrspacecast i8* [[IS_I_PTR]] to i8 addrspace(4)*
+// CHECK-NEXT: [[VAR23_ASCAST:%.*]] = addrspacecast i32* [[VAR23]] to i32 addrspace(4)*
+// CHECK-NEXT: [[CP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CP]] to i8 addrspace(4)* addrspace(4)*
+// CHECK-NEXT: [[ARR_ASCAST:%.*]] = addrspacecast [42 x i32]* [[ARR]] to [42 x i32] addrspace(4)*
+// CHECK-NEXT: [[CPP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CPP]] to i8 addrspace(4)* addrspace(4)*
+// CHECK-NEXT: [[APTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[APTR]] to i32 addrspace(4)* addrspace(4)*
+// CHECK-NEXT: [[STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[STR]] to i8 addrspace(4)* addrspace(4)*
+// CHECK-NEXT: [[PHI_STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[PHI_STR]] to i8 addrspace(4)* addrspace(4)*
+// CHECK-NEXT: [[SELECT_NULL_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_NULL]] to i8 addrspace(4)* addrspace(4)*
+// CHECK-NEXT: [[SELECT_STR_TRIVIAL1_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL1]] to i8 addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[SELECT_STR_TRIVIAL2_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL2]] to i8 addrspace(4)* addrspace(4)*
// CHECK-NEXT: store i32 0, i32 addrspace(4)* [[I_ASCAST]], align 4
// CHECK-NEXT: store i32 addrspace(4)* [[I_ASCAST]], i32 addrspace(4)* addrspace(4)* [[PPTR_ASCAST]], align 8
diff --git a/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp b/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp
index 4ed953a9ebf7b..206f3d4e4a409 100644
--- a/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp
+++ b/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp
@@ -12,7 +12,9 @@ int test_amdgcn_target_temp_alloca() {
int arr[N];
// CHECK: [[VAR_ADDR:%.+]] = alloca [100 x i32]*, align 8, addrspace(5)
+ // CHECK-NEXT: [[VAR2_ADDR:%.+]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[VAR_ADDR_CAST:%.+]] = addrspacecast [100 x i32]* addrspace(5)* [[VAR_ADDR]] to [100 x i32]**
+ // CHECK-NEXT: [[VAR2_ADDR_CAST:%.+]] = addrspacecast i32 addrspace(5)* [[VAR2_ADDR]] to i32*
// CHECK: store [100 x i32]* [[VAR:%.+]], [100 x i32]** [[VAR_ADDR_CAST]], align 8
#pragma omp target
More information about the cfe-commits
mailing list