[clang] [clang] remove addrspace cast from CreateIRTemp (PR #179327)
Jameson Nash via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 3 07:54:15 PST 2026
https://github.com/vtjnash updated https://github.com/llvm/llvm-project/pull/179327
>From 4d8c2ce6c5435962f127bcd0d138b14133db0e5e Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Mon, 2 Feb 2026 19:02:59 +0000
Subject: [PATCH] [clang] remove addrspace cast from CreateIRTemp
This adds unnecessary work to the IR (since they are only used for load
and store), which just causes some IR noise.
---
clang/lib/CodeGen/CGClass.cpp | 2 +-
clang/lib/CodeGen/CGExpr.cpp | 7 +-
clang/lib/CodeGen/CGExprScalar.cpp | 8 +-
clang/lib/CodeGen/CodeGenFunction.cpp | 2 +-
clang/lib/CodeGen/CodeGenFunction.h | 12 +-
clang/lib/CodeGen/CodeGenPGO.cpp | 2 +-
.../test/CodeGen/AMDGPU/amdgpu-atomic-float.c | 28 --
clang/test/CodeGen/AMDGPU/full-bf16.c | 2 -
clang/test/CodeGen/amdgpu-abi-version.c | 2 -
clang/test/CodeGen/builtins-extended-image.c | 88 ------
clang/test/CodeGen/builtins-image-load.c | 84 ------
clang/test/CodeGen/scoped-atomic-ops.c | 102 -------
clang/test/CodeGen/target-addrspace.cpp | 4 -
clang/test/CodeGenCUDA/amdgpu-bf16.cu | 4 -
clang/test/CodeGenCUDA/spirv-amdgcn-bf16.cu | 4 -
.../CodeGenCXX/dynamic-cast-address-space.cpp | 4 -
clang/test/CodeGenHIP/amdgpu-barrier-type.hip | 2 -
.../CodeGenHIP/builtins-make-buffer-rsrc.hip | 8 -
clang/test/CodeGenHIP/hip_weak_alias.cpp | 28 +-
clang/test/CodeGenHIP/printf.cpp | 8 -
clang/test/CodeGenHIP/printf_nonhostcall.cpp | 16 --
.../CodeGenHIP/sanitize-undefined-null.hip | 2 -
clang/test/CodeGenHIP/spirv-amdgcn-ballot.cpp | 2 -
.../CodeGenOpenCL/addr-space-struct-arg.cl | 13 +-
.../amdgpu-abi-struct-arg-byref.cl | 4 +-
clang/test/CodeGenOpenCL/atomic-ops.cl | 1 -
.../CodeGenOpenCL/builtins-amdgcn-gfx12.cl | 4 -
.../CodeGenOpenCL/builtins-amdgcn-gfx1250.cl | 14 -
.../CodeGenOpenCL/check-atomic-alignment.cl | 2 -
clang/test/CodeGenSYCL/function-attrs.cpp | 14 +-
.../__clang_hip_libdevice_declares.cpp | 18 +-
.../test/Headers/amdgcn_openmp_device_math.c | 270 +++++++-----------
.../Headers/amdgcn_openmp_device_math_c.c | 6 -
.../amdgcn_openmp_device_math_constexpr.cpp | 32 ---
clang/test/Headers/gpu_disabled_math.cpp | 2 -
clang/test/Headers/gpuintrin.c | 92 ++----
clang/test/Headers/gpuintrin_lang.c | 12 -
.../Headers/openmp-device-functions-bool.c | 12 +-
clang/test/Headers/openmp_new_nothrow.cpp | 6 -
clang/test/OpenMP/amdgcn_weak_alias.c | 30 +-
clang/test/OpenMP/amdgcn_weak_alias.cpp | 26 +-
41 files changed, 242 insertions(+), 737 deletions(-)
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 02e8912bdbbf0..81ca858d29512 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -2495,7 +2495,7 @@ void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
// FIXME: This is dumb, we should ask the ABI not to try to set the return
// value instead.
if (!RetType->isVoidType())
- ReturnValue = CreateIRTemp(RetType, "retval.inhctor");
+ ReturnValue = CreateIRTempWithoutCast(RetType, "retval.inhctor");
CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
CXXThisValue = CXXABIThisValue;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 339314ecff9cd..a2eb4d5930829 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -181,9 +181,10 @@ RawAddress CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty,
return CreateTempAlloca(Ty, Align, Name);
}
-RawAddress CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
+RawAddress CodeGenFunction::CreateIRTempWithoutCast(QualType Ty,
+ const Twine &Name) {
CharUnits Align = getContext().getTypeAlignInChars(Ty);
- return CreateTempAlloca(ConvertType(Ty), Align, Name);
+ return CreateTempAllocaWithoutCast(ConvertType(Ty), Align, Name, nullptr);
}
RawAddress CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
@@ -6156,7 +6157,7 @@ CodeGenFunction::EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, QualType Ty) {
OpaqueValueMappingData::bind(*this, E->getOpaqueArgLValue(), BaseLV);
QualType ExprTy = E->getType();
- Address OutTemp = CreateIRTemp(ExprTy);
+ Address OutTemp = CreateIRTempWithoutCast(ExprTy);
LValue TempLV = MakeAddrLValue(OutTemp, ExprTy);
if (E->isInOut())
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 1b1bc4a11741e..9548b25507274 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2459,8 +2459,8 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
assert(LoadList.size() >= VecTy->getNumElements() &&
"Flattened type on RHS must have the same number or more elements "
"than vector on LHS.");
- llvm::Value *V =
- CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
+ llvm::Value *V = CGF.Builder.CreateLoad(
+ CGF.CreateIRTempWithoutCast(DestTy, "flatcast.tmp"));
// write to V.
for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) {
RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
@@ -2478,8 +2478,8 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
"Flattened type on RHS must have the same number or more elements "
"than vector on LHS.");
- llvm::Value *V =
- CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
+ llvm::Value *V = CGF.Builder.CreateLoad(
+ CGF.CreateIRTempWithoutCast(DestTy, "flatcast.tmp"));
// V is an allocated temporary to build the truncated matrix into.
for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
unsigned ColMajorIndex =
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 61128316963ac..d8ee39b347697 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1244,7 +1244,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
ReturnValue = Address(Addr, ConvertType(RetTy),
CGM.getNaturalTypeAlignment(RetTy), KnownNonNull);
} else {
- ReturnValue = CreateIRTemp(RetTy, "retval");
+ ReturnValue = CreateIRTempWithoutCast(RetTy, "retval");
// Tell the epilog emitter to autorelease the result. We do this
// now so that various specialized functions can suppress it
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 226950ab599e3..044b9834b1a92 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2904,15 +2904,15 @@ class CodeGenFunction : public CodeGenTypeCache {
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty,
const Twine &Name = "tmp");
- /// CreateIRTemp - Create a temporary IR object of the given type, with
- /// appropriate alignment. This routine should only be used when an temporary
- /// value needs to be stored into an alloca (for example, to avoid explicit
- /// PHI construction), but the type is the IR type, not the type appropriate
- /// for storing in memory.
+ /// CreateIRTempWithoutCast - Create a temporary IR object of the given type,
+ /// with appropriate alignment. This routine should only be used when an
+ /// temporary value needs to be stored into an alloca (for example, to avoid
+ /// explicit PHI construction), but the type is the IR type, not the type
+ /// appropriate for storing in memory.
///
/// That is, this is exactly equivalent to CreateMemTemp, but calling
/// ConvertType instead of ConvertTypeForMem.
- RawAddress CreateIRTemp(QualType T, const Twine &Name = "tmp");
+ RawAddress CreateIRTempWithoutCast(QualType T, const Twine &Name = "tmp");
/// CreateMemTemp - Create a temporary memory object of the given type, with
/// appropriate alignmen and cast it to the default address space. Returns
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 4921eba7934a2..59faa3aef2460 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1505,7 +1505,7 @@ void CodeGenFunction::maybeCreateMCDCCondBitmap() {
// Note: This doesn't initialize Addrs in invalidated Decisions.
for (auto *MCDCCondBitmapAddr : PGO->getMCDCCondBitmapAddrArray(Builder))
*MCDCCondBitmapAddr =
- CreateIRTemp(getContext().UnsignedIntTy, "mcdc.addr");
+ CreateIRTempWithoutCast(getContext().UnsignedIntTy, "mcdc.addr");
}
}
bool CodeGenFunction::isMCDCDecisionExpr(const Expr *E) const {
diff --git a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
index d74470304c69e..1b77ead54d2b2 100644
--- a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
+++ b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
@@ -5,16 +5,12 @@
// SAFE-LABEL: define dso_local float @test_float_post_inc(
// SAFE-SAME: ) #[[ATTR0:[0-9]+]] {
// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory [[META3]]
// SAFE-NEXT: ret float [[TMP0]]
//
// UNSAFE-LABEL: define dso_local float @test_float_post_inc(
// UNSAFE-SAME: ) #[[ATTR0:[0-9]+]] {
// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
// UNSAFE-NEXT: ret float [[TMP0]]
//
@@ -27,8 +23,6 @@ float test_float_post_inc()
// CHECK-LABEL: define dso_local float @test_float_post_dc(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: ret float [[TMP0]]
//
@@ -41,8 +35,6 @@ float test_float_post_dc()
// CHECK-LABEL: define dso_local float @test_float_pre_dc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
// CHECK-NEXT: ret float [[TMP1]]
@@ -56,8 +48,6 @@ float test_float_pre_dc()
// SAFE-LABEL: define dso_local float @test_float_pre_inc(
// SAFE-SAME: ) #[[ATTR0]] {
// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
// SAFE-NEXT: ret float [[TMP1]]
@@ -65,8 +55,6 @@ float test_float_pre_dc()
// UNSAFE-LABEL: define dso_local float @test_float_pre_inc(
// UNSAFE-SAME: ) #[[ATTR0]] {
// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
// UNSAFE-NEXT: ret float [[TMP1]]
@@ -80,8 +68,6 @@ float test_float_pre_inc()
// CHECK-LABEL: define dso_local double @test_double_post_inc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: ret double [[TMP0]]
//
@@ -94,8 +80,6 @@ double test_double_post_inc()
// CHECK-LABEL: define dso_local double @test_double_post_dc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: ret double [[TMP0]]
//
@@ -108,8 +92,6 @@ double test_double_post_dc()
// CHECK-LABEL: define dso_local double @test_double_pre_dc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
// CHECK-NEXT: ret double [[TMP1]]
@@ -123,8 +105,6 @@ double test_double_pre_dc()
// CHECK-LABEL: define dso_local double @test_double_pre_inc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
// CHECK-NEXT: ret double [[TMP1]]
@@ -138,8 +118,6 @@ double test_double_pre_inc()
// CHECK-LABEL: define dso_local half @test__Float16_post_inc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: ret half [[TMP0]]
//
@@ -152,8 +130,6 @@ _Float16 test__Float16_post_inc()
// CHECK-LABEL: define dso_local half @test__Float16_post_dc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: ret half [[TMP0]]
//
@@ -166,8 +142,6 @@ _Float16 test__Float16_post_dc()
// CHECK-LABEL: define dso_local half @test__Float16_pre_dc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: [[TMP1:%.*]] = fsub half [[TMP0]], 0xH3C00
// CHECK-NEXT: ret half [[TMP1]]
@@ -181,8 +155,6 @@ _Float16 test__Float16_pre_dc()
// CHECK-LABEL: define dso_local half @test__Float16_pre_inc(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// CHECK-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], 0xH3C00
// CHECK-NEXT: ret half [[TMP1]]
diff --git a/clang/test/CodeGen/AMDGPU/full-bf16.c b/clang/test/CodeGen/AMDGPU/full-bf16.c
index d2ec34561cd8e..3ce1027b95805 100644
--- a/clang/test/CodeGen/AMDGPU/full-bf16.c
+++ b/clang/test/CodeGen/AMDGPU/full-bf16.c
@@ -10,10 +10,8 @@
// CHECK-LABEL: define dso_local bfloat @div(
// CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR_ASCAST]], align 2
diff --git a/clang/test/CodeGen/amdgpu-abi-version.c b/clang/test/CodeGen/amdgpu-abi-version.c
index b9c1de0521b95..cc6223da76554 100644
--- a/clang/test/CodeGen/amdgpu-abi-version.c
+++ b/clang/test/CodeGen/amdgpu-abi-version.c
@@ -7,8 +7,6 @@
// CHECK-LABEL: define dso_local i32 @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
// CHECK-NEXT: [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
// CHECK-NEXT: [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
diff --git a/clang/test/CodeGen/builtins-extended-image.c b/clang/test/CodeGen/builtins-extended-image.c
index 0dbf81dabd77b..491bbcf7d5412 100644
--- a/clang/test/CodeGen/builtins-extended-image.c
+++ b/clang/test/CodeGen/builtins-extended-image.c
@@ -8,13 +8,11 @@ typedef _Float16 half4 __attribute__((ext_vector_type(4)));
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -41,13 +39,11 @@ float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -74,13 +70,11 @@ float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -107,13 +101,11 @@ float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -140,13 +132,11 @@ float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -172,13 +162,11 @@ float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -205,13 +193,11 @@ float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -239,13 +225,11 @@ float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -272,13 +256,11 @@ float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -306,13 +288,11 @@ float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -342,13 +322,11 @@ float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_3d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -376,13 +354,11 @@ float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_3d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -411,13 +387,11 @@ float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_3d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -451,13 +425,11 @@ float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_cube_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -485,13 +457,11 @@ float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_cube_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -520,13 +490,11 @@ float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i3
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1darray_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -553,13 +521,11 @@ float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, in
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1darray_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -587,13 +553,11 @@ float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1darray_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -622,13 +586,11 @@ float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2darray_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -656,13 +618,11 @@ float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, in
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2darray_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -691,13 +651,11 @@ float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2darray_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -729,13 +687,11 @@ float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -761,13 +717,11 @@ half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -794,13 +748,11 @@ half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -828,13 +780,11 @@ half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -861,13 +811,11 @@ half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -895,13 +843,11 @@ half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -932,13 +878,11 @@ half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_3d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -966,13 +910,11 @@ half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_3d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1001,13 +943,11 @@ half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_3d_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1041,13 +981,11 @@ half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_cube_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1075,13 +1013,11 @@ half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i3
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_cube_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1110,13 +1046,11 @@ half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1darray_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1143,13 +1077,11 @@ half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1darray_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1177,13 +1109,11 @@ half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1darray_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1212,13 +1142,11 @@ half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2darray_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1246,13 +1174,11 @@ half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2darray_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1281,13 +1207,11 @@ half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2darray_v4f16_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1319,13 +1243,11 @@ half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int
// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2d_f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1352,13 +1274,11 @@ float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, _
// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2d_f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1386,13 +1306,11 @@ float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __
// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2d_f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1423,13 +1341,11 @@ float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __
// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2darray_f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1457,13 +1373,11 @@ float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i
// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2darray_f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1492,13 +1406,11 @@ float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i3
// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2darray_f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
index 8442124416338..d2337eab7dbc8 100644
--- a/clang/test/CodeGen/builtins-image-load.c
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -9,11 +9,9 @@ typedef half half4 __attribute__((ext_vector_type(4)));
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -35,11 +33,9 @@ float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) {
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -61,11 +57,9 @@ float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t te
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -87,11 +81,9 @@ half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex)
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -114,11 +106,9 @@ float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -141,11 +131,9 @@ float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -168,11 +156,9 @@ half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -193,11 +179,9 @@ float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t te
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -218,11 +202,9 @@ half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex)
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -244,11 +226,9 @@ float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -270,11 +250,9 @@ half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -297,11 +275,9 @@ float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t te
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -324,11 +300,9 @@ half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex)
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -351,11 +325,9 @@ float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -378,11 +350,9 @@ half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t te
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -404,11 +374,9 @@ float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -430,11 +398,9 @@ half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -457,11 +423,9 @@ float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_tex
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -484,11 +448,9 @@ half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_textu
// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -511,11 +473,9 @@ float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex)
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -538,11 +498,9 @@ float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -565,11 +523,9 @@ half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -593,11 +549,9 @@ float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -621,11 +575,9 @@ float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_tex
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -649,11 +601,9 @@ half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_textu
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -677,11 +627,9 @@ float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -705,11 +653,9 @@ half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -733,11 +679,9 @@ float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_textur
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -761,13 +705,11 @@ half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -792,13 +734,11 @@ float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, floa
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -823,12 +763,10 @@ half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32(
// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -852,13 +790,11 @@ float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, __
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -884,12 +820,10 @@ half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, f
// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32(
// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -913,13 +847,11 @@ float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, __amdgpu_t
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -945,13 +877,11 @@ float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, floa
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -977,12 +907,10 @@ half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float
// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32(
// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
@@ -1007,13 +935,11 @@ float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, __amd
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1040,13 +966,11 @@ float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32,
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1073,13 +997,11 @@ half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, f
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1106,13 +1028,11 @@ float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, floa
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1139,13 +1059,11 @@ half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1172,13 +1090,11 @@ float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, fl
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c
index 1385081578d2e..3fbaf75cf98e6 100644
--- a/clang/test/CodeGen/scoped-atomic-ops.c
+++ b/clang/test/CodeGen/scoped-atomic-ops.c
@@ -9,10 +9,8 @@
// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi1a(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr
// AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
@@ -40,10 +38,8 @@
// AMDGCN_CL_20-LABEL: define hidden i32 @fi1a(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
// AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
@@ -108,7 +104,6 @@ int fi1a(int *i) {
// AMDGCN-LABEL: define hidden i32 @fi1b(
// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5)
@@ -116,7 +111,6 @@ int fi1a(int *i) {
// AMDGCN-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// AMDGCN-NEXT: [[ATOMIC_TEMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP1]] to ptr
@@ -2591,12 +2585,10 @@ void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4a(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr
@@ -2624,12 +2616,10 @@ void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4a(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
@@ -2691,12 +2681,10 @@ _Bool fi4a(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4b(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr
@@ -2724,12 +2712,10 @@ _Bool fi4a(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4b(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
@@ -2791,12 +2777,10 @@ _Bool fi4b(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4c(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr
@@ -2824,12 +2808,10 @@ _Bool fi4b(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4c(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
@@ -2891,12 +2873,10 @@ _Bool fi4c(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4_clustr(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr
@@ -2924,12 +2904,10 @@ _Bool fi4c(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4_clustr(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
@@ -2991,12 +2969,10 @@ _Bool fi4_clustr(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4d(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr
@@ -3024,12 +3000,10 @@ _Bool fi4_clustr(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4d(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
@@ -3091,12 +3065,10 @@ _Bool fi4d(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4e(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr
@@ -3124,12 +3096,10 @@ _Bool fi4d(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4e(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
@@ -3191,12 +3161,10 @@ _Bool fi4e(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5a(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
@@ -3224,12 +3192,10 @@ _Bool fi4e(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5a(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
@@ -3290,12 +3256,10 @@ _Bool fi5a(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5b(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
@@ -3323,12 +3287,10 @@ _Bool fi5a(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5b(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
@@ -3389,12 +3351,10 @@ _Bool fi5b(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5c(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
@@ -3422,12 +3382,10 @@ _Bool fi5b(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5c(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
@@ -3487,12 +3445,10 @@ _Bool fi5c(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5_clustr(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
@@ -3520,12 +3476,10 @@ _Bool fi5c(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5_clustr(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
@@ -3585,12 +3539,10 @@ _Bool fi5_clustr(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5d(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
@@ -3618,12 +3570,10 @@ _Bool fi5_clustr(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5d(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
@@ -3683,12 +3633,10 @@ _Bool fi5d(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5e(
// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
@@ -3716,12 +3664,10 @@ _Bool fi5d(int *i) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5e(
// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
@@ -3781,11 +3727,9 @@ _Bool fi5e(int *i) {
// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6a(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
@@ -3802,11 +3746,9 @@ _Bool fi5e(int *i) {
// AMDGCN_CL_20-LABEL: define hidden i32 @fi6a(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
@@ -3845,11 +3787,9 @@ int fi6a(int *c, int *d) {
// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6b(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
@@ -3866,11 +3806,9 @@ int fi6a(int *c, int *d) {
// AMDGCN_CL_20-LABEL: define hidden i32 @fi6b(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
@@ -3909,11 +3847,9 @@ int fi6b(int *c, int *d) {
// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6c(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
@@ -3930,11 +3866,9 @@ int fi6b(int *c, int *d) {
// AMDGCN_CL_20-LABEL: define hidden i32 @fi6c(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
@@ -3973,11 +3907,9 @@ int fi6c(int *c, int *d) {
// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6_clustr(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
@@ -3994,11 +3926,9 @@ int fi6c(int *c, int *d) {
// AMDGCN_CL_20-LABEL: define hidden i32 @fi6_clustr(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
@@ -4037,11 +3967,9 @@ int fi6_clustr(int *c, int *d) {
// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6d(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
@@ -4058,11 +3986,9 @@ int fi6_clustr(int *c, int *d) {
// AMDGCN_CL_20-LABEL: define hidden i32 @fi6d(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
@@ -4101,11 +4027,9 @@ int fi6d(int *c, int *d) {
// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6e(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
@@ -4122,11 +4046,9 @@ int fi6d(int *c, int *d) {
// AMDGCN_CL_20-LABEL: define hidden i32 @fi6e(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
@@ -4165,11 +4087,9 @@ int fi6e(int *c, int *d) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7a(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4186,11 +4106,9 @@ int fi6e(int *c, int *d) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7a(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4228,11 +4146,9 @@ _Bool fi7a(_Bool *c) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7b(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4249,11 +4165,9 @@ _Bool fi7a(_Bool *c) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7b(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4291,11 +4205,9 @@ _Bool fi7b(_Bool *c) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7c(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4312,11 +4224,9 @@ _Bool fi7b(_Bool *c) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7c(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4354,11 +4264,9 @@ _Bool fi7c(_Bool *c) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7_clustr(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4375,11 +4283,9 @@ _Bool fi7c(_Bool *c) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7_clustr(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4417,11 +4323,9 @@ _Bool fi7_clustr(_Bool *c) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7d(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4438,11 +4342,9 @@ _Bool fi7_clustr(_Bool *c) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7d(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4480,11 +4382,9 @@ _Bool fi7d(_Bool *c) {
// AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7e(
// AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
@@ -4501,11 +4401,9 @@ _Bool fi7d(_Bool *c) {
// AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7e(
// AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
-// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5)
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
diff --git a/clang/test/CodeGen/target-addrspace.cpp b/clang/test/CodeGen/target-addrspace.cpp
index 9adf53611bc24..c8e65bd9e6796 100644
--- a/clang/test/CodeGen/target-addrspace.cpp
+++ b/clang/test/CodeGen/target-addrspace.cpp
@@ -53,9 +53,7 @@ void p1(void [[clang::address_space(0)]] * p) { f(p); }
// AMDGPU-LABEL: define hidden noundef ptr @_Z2p2PU3AS3v(
// AMDGPU-SAME: ptr addrspace(3) noundef [[P:%.*]]) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGPU-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// AMDGPU-NEXT: store ptr addrspace(3) [[P]], ptr [[P_ADDR_ASCAST]], align 4
// AMDGPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[P_ADDR_ASCAST]], align 4
@@ -75,9 +73,7 @@ void *p2(void [[clang::address_space(3)]] * p) { return p; }
// AMDGPU-LABEL: define hidden noundef ptr @_Z2p3PU3AS3v(
// AMDGPU-SAME: ptr addrspace(3) noundef [[P:%.*]]) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGPU-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// AMDGPU-NEXT: store ptr addrspace(3) [[P]], ptr [[P_ADDR_ASCAST]], align 4
// AMDGPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[P_ADDR_ASCAST]], align 4
diff --git a/clang/test/CodeGenCUDA/amdgpu-bf16.cu b/clang/test/CodeGenCUDA/amdgpu-bf16.cu
index f9b067d3fe0d3..d146eb6dd9516 100644
--- a/clang/test/CodeGenCUDA/amdgpu-bf16.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-bf16.cu
@@ -54,9 +54,7 @@ __device__ void test_load(__bf16 *out, __bf16 *in) {
// CHECK-LABEL: @_Z8test_retDF16b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2
@@ -68,9 +66,7 @@ __device__ __bf16 test_ret( __bf16 in) {
// CHECK-LABEL: @_Z9test_callDF16b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2
diff --git a/clang/test/CodeGenCUDA/spirv-amdgcn-bf16.cu b/clang/test/CodeGenCUDA/spirv-amdgcn-bf16.cu
index 2a0f84d1daa75..caa5ed1783494 100644
--- a/clang/test/CodeGenCUDA/spirv-amdgcn-bf16.cu
+++ b/clang/test/CodeGenCUDA/spirv-amdgcn-bf16.cu
@@ -54,9 +54,7 @@ __device__ void test_load(__bf16 *out, __bf16 *in) {
// CHECK-LABEL: @_Z8test_retDF16b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
@@ -68,9 +66,7 @@ __device__ __bf16 test_ret( __bf16 in) {
// CHECK-LABEL: @_Z9test_callDF16b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 691a74f000adb..f81fc84691bf6 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -24,11 +24,9 @@ B fail;
// CHECK-LABEL: define dso_local noundef nonnull align 8 dereferenceable(8) ptr @_Z1fP1A(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] personality ptr @__gxx_personality_v0 {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[EXN_SLOT:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
@@ -61,11 +59,9 @@ B fail;
// WITH-NONZERO-DEFAULT-AS-LABEL: define spir_func noundef align 8 dereferenceable(8) ptr addrspace(4) @_Z1fP1A(
// WITH-NONZERO-DEFAULT-AS-SAME: ptr addrspace(4) noundef [[A:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] personality ptr addrspace(4) @__gxx_personality_v0 {
// WITH-NONZERO-DEFAULT-AS-NEXT: entry:
-// WITH-NONZERO-DEFAULT-AS-NEXT: [[RETVAL:%.*]] = alloca ptr addrspace(4), align 8
// WITH-NONZERO-DEFAULT-AS-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// WITH-NONZERO-DEFAULT-AS-NEXT: [[EXN_SLOT:%.*]] = alloca ptr addrspace(4), align 8
// WITH-NONZERO-DEFAULT-AS-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
-// WITH-NONZERO-DEFAULT-AS-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// WITH-NONZERO-DEFAULT-AS-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4)
// WITH-NONZERO-DEFAULT-AS-NEXT: store ptr addrspace(4) [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
// WITH-NONZERO-DEFAULT-AS-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
diff --git a/clang/test/CodeGenHIP/amdgpu-barrier-type.hip b/clang/test/CodeGenHIP/amdgpu-barrier-type.hip
index 229e8b3c737c6..383fba9c2493f 100644
--- a/clang/test/CodeGenHIP/amdgpu-barrier-type.hip
+++ b/clang/test/CodeGenHIP/amdgpu-barrier-type.hip
@@ -17,9 +17,7 @@ void useBar(__amdgpu_named_workgroup_barrier_t *);
// CHECK-LABEL: define {{[^@]+}}@_Z7testSemPu34__amdgpu_named_workgroup_barrier_t
// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
diff --git a/clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip b/clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip
index e92105091712c..83d5f44b3aeb8 100644
--- a/clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip
+++ b/clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip
@@ -7,12 +7,10 @@
// CHECK-LABEL: define dso_local ptr addrspace(8) @_Z31test_amdgcn_make_buffer_rsrc_p0Pvsii(
// CHECK-SAME: ptr noundef [[P:%.*]], i16 noundef signext [[STRIDE:%.*]], i32 noundef [[NUM:%.*]], i32 noundef [[FLAGS:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[STRIDE_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
// CHECK-NEXT: [[NUM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[FLAGS_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// CHECK-NEXT: [[STRIDE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STRIDE_ADDR]] to ptr
// CHECK-NEXT: [[NUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NUM_ADDR]] to ptr
@@ -36,11 +34,9 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short
// CHECK-LABEL: define dso_local ptr addrspace(8) @_Z47test_amdgcn_make_buffer_rsrc_p0_stride_constantPvii(
// CHECK-SAME: ptr noundef [[P:%.*]], i32 noundef [[NUM:%.*]], i32 noundef [[FLAGS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[NUM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[FLAGS_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// CHECK-NEXT: [[NUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NUM_ADDR]] to ptr
// CHECK-NEXT: [[FLAGS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS_ADDR]] to ptr
@@ -61,11 +57,9 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constan
// CHECK-LABEL: define dso_local ptr addrspace(8) @_Z44test_amdgcn_make_buffer_rsrc_p0_num_constantPvsi(
// CHECK-SAME: ptr noundef [[P:%.*]], i16 noundef signext [[STRIDE:%.*]], i32 noundef [[FLAGS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[STRIDE_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
// CHECK-NEXT: [[FLAGS_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// CHECK-NEXT: [[STRIDE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STRIDE_ADDR]] to ptr
// CHECK-NEXT: [[FLAGS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS_ADDR]] to ptr
@@ -85,11 +79,9 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(v
// CHECK-LABEL: define dso_local ptr addrspace(8) @_Z46test_amdgcn_make_buffer_rsrc_p0_flags_constantPvsi(
// CHECK-SAME: ptr noundef [[P:%.*]], i16 noundef signext [[STRIDE:%.*]], i32 noundef [[NUM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[STRIDE_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
// CHECK-NEXT: [[NUM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// CHECK-NEXT: [[STRIDE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STRIDE_ADDR]] to ptr
// CHECK-NEXT: [[NUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NUM_ADDR]] to ptr
diff --git a/clang/test/CodeGenHIP/hip_weak_alias.cpp b/clang/test/CodeGenHIP/hip_weak_alias.cpp
index 3cc9a2bd09fc0..3484d7f550056 100644
--- a/clang/test/CodeGenHIP/hip_weak_alias.cpp
+++ b/clang/test/CodeGenHIP/hip_weak_alias.cpp
@@ -9,6 +9,9 @@
extern "C" {
//.
+// HOST: @__HostVar = global i32 1, align 4
+// HOST: @__hip_cuid_ = global i8 0
+// HOST: @llvm.compiler.used = appending global [1 x ptr] [ptr @__hip_cuid_], section "llvm.metadata"
// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc
// HOST: @HostFunc_ = alias i32 (), ptr @__HostFunc
// HOST: @HostVar = weak alias i32, ptr @__HostVar
@@ -20,6 +23,8 @@ extern "C" {
// HOST: @_Z4Fourv = weak alias i32 (), ptr @_Z6__Fourv
// HOST: @_Z4Fourf = weak alias float (float), ptr @_Z6__Fourf
//.
+// DEVICE: @__hip_cuid_ = addrspace(1) global i8 0
+// DEVICE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
// DEVICE: @One = weak alias i32 (), ptr @__One
// DEVICE: @One_ = alias i32 (), ptr @__One
// DEVICE: @Two = weak alias i32 (), ptr @__Two
@@ -44,8 +49,6 @@ extern int __attribute__((alias("__HostVar"))) HostVar_;
// DEVICE-LABEL: define dso_local i32 @__One(
// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: ret i32 1
//
__device__ int __One(void) { return 1; }
@@ -60,8 +63,6 @@ __device__ int One_(void) __attribute__((alias("__One")));
// DEVICE-LABEL: define dso_local i32 @__Two(
// DEVICE-SAME: ) #[[ATTR0]] {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: ret i32 2
//
__host__ __device__ int __Two(void) { return 2; }
@@ -77,8 +78,6 @@ __host__ __device__ int Two_(void) __attribute__((alias("__Two")));
// DEVICE-LABEL: define linkonce_odr noundef i32 @_Z7__Threev(
// DEVICE-SAME: ) #[[ATTR0]] comdat {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: ret i32 5
//
__host__ __device__ constexpr int __Three(void) { return 5; }
@@ -94,8 +93,6 @@ __host__ __device__ int Three_(void) __attribute__((alias("_Z7__Threev")));
// DEVICE-LABEL: define dso_local noundef i32 @_Z6__Fourv(
// DEVICE-SAME: ) #[[ATTR0]] {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: ret i32 2
//
__host__ __device__ int __Four(void) { return 2; }
@@ -111,9 +108,7 @@ __host__ __device__ int __Four(void) { return 2; }
// DEVICE-LABEL: define dso_local noundef float @_Z6__Fourf(
// DEVICE-SAME: float noundef [[F:%.*]]) #[[ATTR0]] {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// DEVICE-NEXT: [[F_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
// DEVICE-NEXT: store float [[F]], ptr [[F_ADDR_ASCAST]], align 4
// DEVICE-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR_ASCAST]], align 4
@@ -123,3 +118,16 @@ __host__ __device__ int __Four(void) { return 2; }
__host__ __device__ float __Four(float f) { return 2.0f * f; }
__host__ __device__ int Four(void) __attribute__((weak, alias("_Z6__Fourv")));
__host__ __device__ float Four(float f) __attribute__((weak, alias("_Z6__Fourf")));
+//.
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+// DEVICE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// DEVICE: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/CodeGenHIP/printf.cpp b/clang/test/CodeGenHIP/printf.cpp
index 2d814d6fdc704..2dc08aa1e5dd9 100644
--- a/clang/test/CodeGenHIP/printf.cpp
+++ b/clang/test/CodeGenHIP/printf.cpp
@@ -12,9 +12,7 @@ extern "C" __device__ int printf(const char *format, ...);
// AMDGCN-LABEL: define dso_local noundef i32 @_Z4foo1v(
// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*]]:
-// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
-// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
// AMDGCN-NEXT: store ptr addrspacecast (ptr addrspace(4) @.str to ptr), ptr [[S_ASCAST]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
@@ -65,9 +63,7 @@ extern "C" __device__ int printf(const char *format, ...);
// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @_Z4foo1v(
// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]:
-// AMDGCNSPIRV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// AMDGCNSPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8
-// AMDGCNSPIRV-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str to ptr addrspace(4)), ptr addrspace(4) [[S_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ASCAST]], align 8
@@ -125,8 +121,6 @@ __device__ char *dstr;
// AMDGCN-LABEL: define dso_local noundef i32 @_Z4foo2v(
// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*]]:
-// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspacecast (ptr addrspace(1) @dstr to ptr), align 8
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspacecast (ptr addrspace(1) @dstr to ptr), align 8
// AMDGCN-NEXT: [[TMP2:%.*]] = call i64 @__ockl_printf_begin(i64 0)
@@ -171,8 +165,6 @@ __device__ char *dstr;
// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @_Z4foo2v(
// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]:
-// AMDGCNSPIRV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
-// AMDGCNSPIRV-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) addrspacecast (ptr addrspace(1) @dstr to ptr addrspace(4)), align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) addrspacecast (ptr addrspace(1) @dstr to ptr addrspace(4)), align 8
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i64 @__ockl_printf_begin(i64 0)
diff --git a/clang/test/CodeGenHIP/printf_nonhostcall.cpp b/clang/test/CodeGenHIP/printf_nonhostcall.cpp
index 1982eb864e2a4..a05b8166eda8a 100644
--- a/clang/test/CodeGenHIP/printf_nonhostcall.cpp
+++ b/clang/test/CodeGenHIP/printf_nonhostcall.cpp
@@ -13,9 +13,7 @@ extern "C" __device__ int printf(const char *format, ...);
// CHECK-LABEL: define dso_local noundef i32 @_Z4foo1v
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
// CHECK-NEXT: store ptr addrspacecast (ptr addrspace(4) @.str to ptr), ptr [[S_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
@@ -70,9 +68,7 @@ extern "C" __device__ int printf(const char *format, ...);
//
// CHECK_CONSTRAINED-LABEL: define dso_local noundef i32 @_Z4foo1v
// CHECK_CONSTRAINED-NEXT: entry:
-// CHECK_CONSTRAINED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK_CONSTRAINED-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK_CONSTRAINED-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK_CONSTRAINED-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
// CHECK_CONSTRAINED-NEXT: store ptr addrspacecast (ptr addrspace(4) @.str to ptr), ptr [[S_ASCAST]], align 8
// CHECK_CONSTRAINED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
@@ -134,9 +130,7 @@ __device__ char *dstr;
__device__ const
// CHECK-LABEL: define dso_local noundef i32 @_Z4foo2v
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[LCVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[LCVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LCVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspacecast (ptr addrspace(1) @dstr to ptr), align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspacecast (ptr addrspace(1) @dstr to ptr), align 8
@@ -186,9 +180,7 @@ __device__ const
//
// CHECK_CONSTRAINED-LABEL: define dso_local noundef i32 @_Z4foo2v
// CHECK_CONSTRAINED-NEXT: entry:
-// CHECK_CONSTRAINED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK_CONSTRAINED-NEXT: [[LCVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK_CONSTRAINED-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK_CONSTRAINED-NEXT: [[LCVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LCVAL]] to ptr
// CHECK_CONSTRAINED-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspacecast (ptr addrspace(1) @dstr to ptr), align 8
// CHECK_CONSTRAINED-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspacecast (ptr addrspace(1) @dstr to ptr), align 8
@@ -255,8 +247,6 @@ __device__ _BitInt(128) Int128 = 45637;
// CHECK-LABEL: define dso_local noundef i32 @_Z4foo3v
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: store i32 25, ptr addrspacecast (ptr addrspace(3) @_ZZ4foo3vE1s to ptr), align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ4foo3vE1s to ptr), align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr addrspacecast (ptr addrspace(1) @g to ptr), align 2
@@ -316,8 +306,6 @@ __device__ _BitInt(128) Int128 = 45637;
//
// CHECK_CONSTRAINED-LABEL: define dso_local noundef i32 @_Z4foo3v
// CHECK_CONSTRAINED-NEXT: entry:
-// CHECK_CONSTRAINED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK_CONSTRAINED-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK_CONSTRAINED-NEXT: store i32 25, ptr addrspacecast (ptr addrspace(3) @_ZZ4foo3vE1s to ptr), align 4
// CHECK_CONSTRAINED-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ4foo3vE1s to ptr), align 4
// CHECK_CONSTRAINED-NEXT: [[TMP1:%.*]] = load i16, ptr addrspacecast (ptr addrspace(1) @g to ptr), align 2
@@ -384,9 +372,7 @@ __device__ int foo3() {
//A non trivial case,
// CHECK-LABEL: define dso_local noundef i32 @_Z4foo4v
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
// CHECK-NEXT: store ptr addrspacecast (ptr addrspace(4) @.str.4 to ptr), ptr [[S_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
@@ -429,9 +415,7 @@ __device__ int foo3() {
//
// CHECK_CONSTRAINED-LABEL: define dso_local noundef i32 @_Z4foo4v
// CHECK_CONSTRAINED-NEXT: entry:
-// CHECK_CONSTRAINED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK_CONSTRAINED-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK_CONSTRAINED-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK_CONSTRAINED-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
// CHECK_CONSTRAINED-NEXT: store ptr addrspacecast (ptr addrspace(4) @.str.4 to ptr), ptr [[S_ASCAST]], align 8
// CHECK_CONSTRAINED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
diff --git a/clang/test/CodeGenHIP/sanitize-undefined-null.hip b/clang/test/CodeGenHIP/sanitize-undefined-null.hip
index 9418b125f9955..229507129d0ff 100644
--- a/clang/test/CodeGenHIP/sanitize-undefined-null.hip
+++ b/clang/test/CodeGenHIP/sanitize-undefined-null.hip
@@ -14,9 +14,7 @@
//.
// CHECK-LABEL: @_Z3fooPc(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// CHECK-NEXT: store ptr [[P:%.*]], ptr [[P_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
diff --git a/clang/test/CodeGenHIP/spirv-amdgcn-ballot.cpp b/clang/test/CodeGenHIP/spirv-amdgcn-ballot.cpp
index 8226a109d8b8d..a1a351b578fc0 100644
--- a/clang/test/CodeGenHIP/spirv-amdgcn-ballot.cpp
+++ b/clang/test/CodeGenHIP/spirv-amdgcn-ballot.cpp
@@ -12,9 +12,7 @@
// CHECK-LABEL: define spir_func noundef i64 @_Z3fooi(
// CHECK-SAME: i32 noundef [[P:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr [[P_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: store i32 [[P]], ptr addrspace(4) [[P_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[P_ADDR_ASCAST]], align 4
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 85157bdcf43f9..758b8a0d4a946 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -645,12 +645,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
-// AMDGCN20-NEXT: [[RETVAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[RETVAL_ASCAST]] to ptr addrspace(5)
// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
-// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
//
@@ -1590,6 +1588,15 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
// AMDGCN30-NEXT: ret void
//
//
+// AMDGCN30-LABEL: define dso_local void @test_indirect_arg_globl(
+// AMDGCN30-SAME: ) #[[ATTR0]] {
+// AMDGCN30-NEXT: [[ENTRY:.*:]]
+// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
+// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
+// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
+// AMDGCN30-NEXT: ret void
+//
+//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
// AMDGCN30-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
index bc65788c17352..1cf1fcf1c9f9f 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
@@ -119,12 +119,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
-// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
-// AMDGCN-NEXT: [[RETVAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[RETVAL_ASCAST]] to ptr addrspace(5)
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
-// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
//
diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl
index 214b3a4314222..f54880f88eb9a 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -344,7 +344,6 @@ int test_volatile(volatile atomic_int *i) {
// CHECK-LABEL: @test_volatile
// CHECK: %[[i_addr:.*]] = alloca ptr
// CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
- // CHECK-NEXT: %[[retval_ascast:.*]] = addrspacecast ptr addrspace(5) {{.*}} to ptr
// CHECK-NEXT: %[[i_addr_ascast:.*]] = addrspacecast ptr addrspace(5) %[[i_addr]] to ptr
// CHECK-NEXT: %[[atomicdst_ascast:.*]] = addrspacecast ptr addrspace(5) %[[atomicdst]] to ptr
// CHECK-NEXT: store ptr %i, ptr %[[i_addr_ascast]]
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index 8c02616780182..14d7e7a365989 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -185,10 +185,8 @@ void test_s_barrier_leave()
// CHECK-LABEL: @test_s_get_barrier_state(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
@@ -205,10 +203,8 @@ unsigned test_s_get_barrier_state(int a)
// CHECK-LABEL: @test_s_get_named_barrier_state(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index b32bcdd408512..0b4cdd0c2c28f 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -1506,10 +1506,8 @@ void test_s_wakeup_barrier(void *bar)
// CHECK-LABEL: @test_global_add_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8
@@ -1525,10 +1523,8 @@ float test_global_add_f32(global float *addr, float x) {
// CHECK-LABEL: @test_global_add_half2(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8
@@ -1544,10 +1540,8 @@ half2 test_global_add_half2(global half2 *addr, half2 x) {
// CHECK-LABEL: @test_flat_add_2f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: store ptr [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8
@@ -1563,10 +1557,8 @@ half2 test_flat_add_2f16(generic half2 *addr, half2 x) {
// CHECK-LABEL: @test_flat_add_2bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: store ptr [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8
@@ -1584,10 +1576,8 @@ short2 test_flat_add_2bf16(generic short2 *addr, short2 x) {
// CHECK-LABEL: @test_global_add_2bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8
@@ -1605,10 +1595,8 @@ short2 test_global_add_2bf16(global short2 *addr, short2 x) {
// CHECK-LABEL: @test_local_add_2f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(3) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 4
@@ -1626,10 +1614,8 @@ short2 test_local_add_2f16(local short2 *addr, short2 x) {
// CHECK-LABEL: @test_local_add_2bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(3) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 4
diff --git a/clang/test/CodeGenOpenCL/check-atomic-alignment.cl b/clang/test/CodeGenOpenCL/check-atomic-alignment.cl
index 89771d20ad090..1f7e4ce78ea55 100644
--- a/clang/test/CodeGenOpenCL/check-atomic-alignment.cl
+++ b/clang/test/CodeGenOpenCL/check-atomic-alignment.cl
@@ -24,10 +24,8 @@ struct __half2 {
// CHECK-LABEL: define dso_local <2 x half> @test_flat_add_2f16(
// CHECK-SAME: ptr noundef [[ADDR:%.*]], <2 x half> noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR_ASCAST]], align 8
diff --git a/clang/test/CodeGenSYCL/function-attrs.cpp b/clang/test/CodeGenSYCL/function-attrs.cpp
index 81f893644bc7c..5f3de41aa6584 100644
--- a/clang/test/CodeGenSYCL/function-attrs.cpp
+++ b/clang/test/CodeGenSYCL/function-attrs.cpp
@@ -5,11 +5,11 @@
int foo();
// CHECK-LABEL: define dso_local spir_func void @_Z3barv(
-// CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
-// CHECK-NEXT: [[CALL:%.*]] = call spir_func noundef i32 @_Z3foov() #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: [[CALL:%.*]] = call spir_func noundef i32 @_Z3foov() #[[ATTR1:[0-9]+]]
// CHECK-NEXT: store i32 [[CALL]], ptr addrspace(4) [[A_ASCAST]], align 4
// CHECK-NEXT: ret void
//
@@ -18,10 +18,8 @@ void bar() {
}
// CHECK-LABEL: define dso_local spir_func noundef i32 @_Z3foov(
-// CHECK-SAME: ) #[[ATTR2]] {
+// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// CHECK-NEXT: ret i32 1
//
int foo() {
@@ -38,8 +36,8 @@ int main() {
return 0;
}
//.
-// CHECK: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CHECK: attributes #1 = { convergent nounwind }
+// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #[[ATTR1]] = { convergent nounwind }
//.
-// CHECK: !{{[0-9]+}} = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
//.
diff --git a/clang/test/Headers/__clang_hip_libdevice_declares.cpp b/clang/test/Headers/__clang_hip_libdevice_declares.cpp
index 2b96f9499eb9a..87453797ab127 100644
--- a/clang/test/Headers/__clang_hip_libdevice_declares.cpp
+++ b/clang/test/Headers/__clang_hip_libdevice_declares.cpp
@@ -71,9 +71,7 @@ void attribute_check_hack(void) {
// CHECK-LABEL: define internal float @_ZL18test_ockl_acos_f32f
// CHECK-SAME: (float [[SRC:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
// CHECK-NEXT: store float [[SRC]], ptr [[SRC_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
@@ -87,7 +85,6 @@ TEST_FUNC_ATTRS float test_ockl_acos_f32(float src) {
// CHECK-LABEL: define internal float @_ZL15test_ockl_fdot2Dv2_DF16_S_fbi
// CHECK-SAME: (<2 x half> [[A:%.*]], <2 x half> [[B:%.*]], float [[C:%.*]], i1 zeroext [[S:%.*]], i32 [[S_INT:%.*]]) #[[ATTR2]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5)
@@ -95,7 +92,6 @@ TEST_FUNC_ATTRS float test_ockl_acos_f32(float src) {
// CHECK-NEXT: [[S_INT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
@@ -106,23 +102,23 @@ TEST_FUNC_ATTRS float test_ockl_acos_f32(float src) {
// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8
-// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[S]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[S_ADDR_ASCAST]], align 1
// CHECK-NEXT: store i32 [[S_INT]], ptr [[S_INT_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1
-// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
-// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP0]], <2 x half> [[TMP1]], float [[TMP2]], i1 zeroext [[TOBOOL]]) #[[ATTR4]]
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP0]], <2 x half> [[TMP1]], float [[TMP2]], i1 zeroext [[LOADEDV]]) #[[ATTR4]]
// CHECK-NEXT: store float [[CALL]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[S_INT_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[TMP7]], 0
-// CHECK-NEXT: [[CALL2:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP4]], <2 x half> [[TMP5]], float [[TMP6]], i1 zeroext [[TOBOOL1]]) #[[ATTR4]]
-// CHECK-NEXT: store float [[CALL2]], ptr [[Y_ASCAST]], align 4
+// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
+// CHECK-NEXT: [[CALL1:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP4]], <2 x half> [[TMP5]], float [[TMP6]], i1 zeroext [[TOBOOL]]) #[[ATTR4]]
+// CHECK-NEXT: store float [[CALL1]], ptr [[Y_ASCAST]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[Y_ASCAST]], align 4
// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP8]], [[TMP9]]
diff --git a/clang/test/Headers/amdgcn_openmp_device_math.c b/clang/test/Headers/amdgcn_openmp_device_math.c
index 243d241628b2a..a1ea836cb87ad 100644
--- a/clang/test/Headers/amdgcn_openmp_device_math.c
+++ b/clang/test/Headers/amdgcn_openmp_device_math.c
@@ -14,15 +14,11 @@
// CHECK-C-LABEL: @test_math_f64(
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: [[RETVAL_I13:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I14:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I9:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[__Y_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[__Z_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I9:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I10:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I4:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I5:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I7:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I4:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[__X_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[X_ADDR:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[Y_ADDR:%.*]] = alloca double, align 8, addrspace(5)
@@ -42,37 +38,33 @@
// CHECK-C-NEXT: store double [[Y:%.*]], ptr [[Y_ADDR_ASCAST]], align 8
// CHECK-C-NEXT: store double [[Z:%.*]], ptr [[Z_ADDR_ASCAST]], align 8
// CHECK-C-NEXT: [[TMP0:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-C-NEXT: store double [[TMP0]], ptr [[__X_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: [[TMP1:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: [[CALL_I:%.*]] = call double @__ocml_sin_f64(double noundef [[TMP1]]) #[[ATTR3:[0-9]+]]
// CHECK-C-NEXT: store double [[CALL_I]], ptr [[L1_ASCAST]], align 8
// CHECK-C-NEXT: [[TMP2:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I6:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I4]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I7:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I5]] to ptr
-// CHECK-C-NEXT: store double [[TMP2]], ptr [[__X_ADDR_ASCAST_I7]], align 8
-// CHECK-C-NEXT: [[TMP3:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I7]], align 8
-// CHECK-C-NEXT: [[CALL_I8:%.*]] = call double @__ocml_cos_f64(double noundef [[TMP3]]) #[[ATTR3]]
-// CHECK-C-NEXT: store double [[CALL_I8]], ptr [[L2_ASCAST]], align 8
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I5:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I4]] to ptr
+// CHECK-C-NEXT: store double [[TMP2]], ptr [[__X_ADDR_ASCAST_I5]], align 8
+// CHECK-C-NEXT: [[TMP3:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I5]], align 8
+// CHECK-C-NEXT: [[CALL_I6:%.*]] = call double @__ocml_cos_f64(double noundef [[TMP3]]) #[[ATTR3]]
+// CHECK-C-NEXT: store double [[CALL_I6]], ptr [[L2_ASCAST]], align 8
// CHECK-C-NEXT: [[TMP4:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I11:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I9]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I12:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I10]] to ptr
-// CHECK-C-NEXT: store double [[TMP4]], ptr [[__X_ADDR_ASCAST_I12]], align 8
-// CHECK-C-NEXT: [[TMP5:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I12]], align 8
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I8:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I7]] to ptr
+// CHECK-C-NEXT: store double [[TMP4]], ptr [[__X_ADDR_ASCAST_I8]], align 8
+// CHECK-C-NEXT: [[TMP5:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I8]], align 8
// CHECK-C-NEXT: [[TMP6:%.*]] = call double @llvm.fabs.f64(double [[TMP5]])
// CHECK-C-NEXT: store double [[TMP6]], ptr [[L3_ASCAST]], align 8
// CHECK-C-NEXT: [[TMP7:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
// CHECK-C-NEXT: [[TMP8:%.*]] = load double, ptr [[Y_ADDR_ASCAST]], align 8
// CHECK-C-NEXT: [[TMP9:%.*]] = load double, ptr [[Z_ADDR_ASCAST]], align 8
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I15:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I13]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I16:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I14]] to ptr
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I10:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I9]] to ptr
// CHECK-C-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-C-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
-// CHECK-C-NEXT: store double [[TMP7]], ptr [[__X_ADDR_ASCAST_I16]], align 8
+// CHECK-C-NEXT: store double [[TMP7]], ptr [[__X_ADDR_ASCAST_I10]], align 8
// CHECK-C-NEXT: store double [[TMP8]], ptr [[__Y_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: store double [[TMP9]], ptr [[__Z_ADDR_ASCAST_I]], align 8
-// CHECK-C-NEXT: [[TMP10:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I16]], align 8
+// CHECK-C-NEXT: [[TMP10:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I10]], align 8
// CHECK-C-NEXT: [[TMP11:%.*]] = load double, ptr [[__Y_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: [[TMP12:%.*]] = load double, ptr [[__Z_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: [[TMP13:%.*]] = call double @llvm.fma.f64(double [[TMP10]], double [[TMP11]], double [[TMP12]])
@@ -81,15 +73,11 @@
//
// CHECK-CPP-LABEL: @_Z13test_math_f64ddd(
// CHECK-CPP-NEXT: entry:
-// CHECK-CPP-NEXT: [[RETVAL_I13:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I14:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I9:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-CPP-NEXT: [[__Y_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-CPP-NEXT: [[__Z_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I9:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I10:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I4:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I5:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I7:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I4:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-CPP-NEXT: [[__X_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-CPP-NEXT: [[X_ADDR:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-CPP-NEXT: [[Y_ADDR:%.*]] = alloca double, align 8, addrspace(5)
@@ -109,37 +97,33 @@
// CHECK-CPP-NEXT: store double [[Y:%.*]], ptr [[Y_ADDR_ASCAST]], align 8
// CHECK-CPP-NEXT: store double [[Z:%.*]], ptr [[Z_ADDR_ASCAST]], align 8
// CHECK-CPP-NEXT: [[TMP0:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-CPP-NEXT: store double [[TMP0]], ptr [[__X_ADDR_ASCAST_I]], align 8
// CHECK-CPP-NEXT: [[TMP1:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I]], align 8
// CHECK-CPP-NEXT: [[CALL_I:%.*]] = call noundef double @__ocml_sin_f64(double noundef [[TMP1]]) #[[ATTR3:[0-9]+]]
// CHECK-CPP-NEXT: store double [[CALL_I]], ptr [[L1_ASCAST]], align 8
// CHECK-CPP-NEXT: [[TMP2:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I6:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I4]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I7:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I5]] to ptr
-// CHECK-CPP-NEXT: store double [[TMP2]], ptr [[__X_ADDR_ASCAST_I7]], align 8
-// CHECK-CPP-NEXT: [[TMP3:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I7]], align 8
-// CHECK-CPP-NEXT: [[CALL_I8:%.*]] = call noundef double @__ocml_cos_f64(double noundef [[TMP3]]) #[[ATTR3]]
-// CHECK-CPP-NEXT: store double [[CALL_I8]], ptr [[L2_ASCAST]], align 8
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I5:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I4]] to ptr
+// CHECK-CPP-NEXT: store double [[TMP2]], ptr [[__X_ADDR_ASCAST_I5]], align 8
+// CHECK-CPP-NEXT: [[TMP3:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I5]], align 8
+// CHECK-CPP-NEXT: [[CALL_I6:%.*]] = call noundef double @__ocml_cos_f64(double noundef [[TMP3]]) #[[ATTR3]]
+// CHECK-CPP-NEXT: store double [[CALL_I6]], ptr [[L2_ASCAST]], align 8
// CHECK-CPP-NEXT: [[TMP4:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I11:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I9]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I12:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I10]] to ptr
-// CHECK-CPP-NEXT: store double [[TMP4]], ptr [[__X_ADDR_ASCAST_I12]], align 8
-// CHECK-CPP-NEXT: [[TMP5:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I12]], align 8
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I8:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I7]] to ptr
+// CHECK-CPP-NEXT: store double [[TMP4]], ptr [[__X_ADDR_ASCAST_I8]], align 8
+// CHECK-CPP-NEXT: [[TMP5:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I8]], align 8
// CHECK-CPP-NEXT: [[TMP6:%.*]] = call noundef double @llvm.fabs.f64(double [[TMP5]])
// CHECK-CPP-NEXT: store double [[TMP6]], ptr [[L3_ASCAST]], align 8
// CHECK-CPP-NEXT: [[TMP7:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
// CHECK-CPP-NEXT: [[TMP8:%.*]] = load double, ptr [[Y_ADDR_ASCAST]], align 8
// CHECK-CPP-NEXT: [[TMP9:%.*]] = load double, ptr [[Z_ADDR_ASCAST]], align 8
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I15:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I13]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I16:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I14]] to ptr
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I10:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I9]] to ptr
// CHECK-CPP-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-CPP-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
-// CHECK-CPP-NEXT: store double [[TMP7]], ptr [[__X_ADDR_ASCAST_I16]], align 8
+// CHECK-CPP-NEXT: store double [[TMP7]], ptr [[__X_ADDR_ASCAST_I10]], align 8
// CHECK-CPP-NEXT: store double [[TMP8]], ptr [[__Y_ADDR_ASCAST_I]], align 8
// CHECK-CPP-NEXT: store double [[TMP9]], ptr [[__Z_ADDR_ASCAST_I]], align 8
-// CHECK-CPP-NEXT: [[TMP10:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I16]], align 8
+// CHECK-CPP-NEXT: [[TMP10:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I10]], align 8
// CHECK-CPP-NEXT: [[TMP11:%.*]] = load double, ptr [[__Y_ADDR_ASCAST_I]], align 8
// CHECK-CPP-NEXT: [[TMP12:%.*]] = load double, ptr [[__Z_ADDR_ASCAST_I]], align 8
// CHECK-CPP-NEXT: [[TMP13:%.*]] = call noundef double @llvm.fma.f64(double [[TMP10]], double [[TMP11]], double [[TMP12]])
@@ -155,15 +139,11 @@ void test_math_f64(double x, double y, double z) {
// CHECK-C-LABEL: @test_math_f32(
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: [[RETVAL_I22:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I23:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I18:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[__Y_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[__Z_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I18:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I19:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I13:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I14:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I16:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I13:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[__X_ADDR_I:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-C-NEXT: [[X_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-C-NEXT: [[Y_ADDR:%.*]] = alloca float, align 4, addrspace(5)
@@ -184,7 +164,6 @@ void test_math_f64(double x, double y, double z) {
// CHECK-C-NEXT: store float [[Z:%.*]], ptr [[Z_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP0:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-C-NEXT: store double [[CONV]], ptr [[__X_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: [[TMP1:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I]], align 8
@@ -193,19 +172,17 @@ void test_math_f64(double x, double y, double z) {
// CHECK-C-NEXT: store float [[CONV1]], ptr [[L1_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP2:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: [[CONV2:%.*]] = fpext float [[TMP2]] to double
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I15:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I13]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I16:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I14]] to ptr
-// CHECK-C-NEXT: store double [[CONV2]], ptr [[__X_ADDR_ASCAST_I16]], align 8
-// CHECK-C-NEXT: [[TMP3:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I16]], align 8
-// CHECK-C-NEXT: [[CALL_I17:%.*]] = call double @__ocml_cos_f64(double noundef [[TMP3]]) #[[ATTR3]]
-// CHECK-C-NEXT: [[CONV4:%.*]] = fptrunc double [[CALL_I17]] to float
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I14:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I13]] to ptr
+// CHECK-C-NEXT: store double [[CONV2]], ptr [[__X_ADDR_ASCAST_I14]], align 8
+// CHECK-C-NEXT: [[TMP3:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I14]], align 8
+// CHECK-C-NEXT: [[CALL_I15:%.*]] = call double @__ocml_cos_f64(double noundef [[TMP3]]) #[[ATTR3]]
+// CHECK-C-NEXT: [[CONV4:%.*]] = fptrunc double [[CALL_I15]] to float
// CHECK-C-NEXT: store float [[CONV4]], ptr [[L2_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP4:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: [[CONV5:%.*]] = fpext float [[TMP4]] to double
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I20:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I18]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I21:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I19]] to ptr
-// CHECK-C-NEXT: store double [[CONV5]], ptr [[__X_ADDR_ASCAST_I21]], align 8
-// CHECK-C-NEXT: [[TMP5:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I21]], align 8
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I17:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I16]] to ptr
+// CHECK-C-NEXT: store double [[CONV5]], ptr [[__X_ADDR_ASCAST_I17]], align 8
+// CHECK-C-NEXT: [[TMP5:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I17]], align 8
// CHECK-C-NEXT: [[TMP6:%.*]] = call double @llvm.fabs.f64(double [[TMP5]])
// CHECK-C-NEXT: [[CONV7:%.*]] = fptrunc double [[TMP6]] to float
// CHECK-C-NEXT: store float [[CONV7]], ptr [[L3_ASCAST]], align 4
@@ -215,14 +192,13 @@ void test_math_f64(double x, double y, double z) {
// CHECK-C-NEXT: [[CONV9:%.*]] = fpext float [[TMP8]] to double
// CHECK-C-NEXT: [[TMP9:%.*]] = load float, ptr [[Z_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: [[CONV10:%.*]] = fpext float [[TMP9]] to double
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I24:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I22]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I25:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I23]] to ptr
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I19:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I18]] to ptr
// CHECK-C-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-C-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
-// CHECK-C-NEXT: store double [[CONV8]], ptr [[__X_ADDR_ASCAST_I25]], align 8
+// CHECK-C-NEXT: store double [[CONV8]], ptr [[__X_ADDR_ASCAST_I19]], align 8
// CHECK-C-NEXT: store double [[CONV9]], ptr [[__Y_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: store double [[CONV10]], ptr [[__Z_ADDR_ASCAST_I]], align 8
-// CHECK-C-NEXT: [[TMP10:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I25]], align 8
+// CHECK-C-NEXT: [[TMP10:%.*]] = load double, ptr [[__X_ADDR_ASCAST_I19]], align 8
// CHECK-C-NEXT: [[TMP11:%.*]] = load double, ptr [[__Y_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: [[TMP12:%.*]] = load double, ptr [[__Z_ADDR_ASCAST_I]], align 8
// CHECK-C-NEXT: [[TMP13:%.*]] = call double @llvm.fma.f64(double [[TMP10]], double [[TMP11]], double [[TMP12]])
@@ -232,25 +208,17 @@ void test_math_f64(double x, double y, double z) {
//
// CHECK-CPP-LABEL: @_Z13test_math_f32fff(
// CHECK-CPP-NEXT: entry:
-// CHECK-CPP-NEXT: [[RETVAL_I32:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I33:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__Y_ADDR_I34:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__Z_ADDR_I35:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I28:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I29:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I24:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I25:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I19:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[__X_ADDR_I20:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I14:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I15:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__Y_ADDR_I21:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__Z_ADDR_I22:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I18:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I16:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I13:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I10:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[__Z_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I9:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I10:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I4:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I5:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I7:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I4:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[X_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[Y_ADDR:%.*]] = alloca float, align 4, addrspace(5)
@@ -270,61 +238,53 @@ void test_math_f64(double x, double y, double z) {
// CHECK-CPP-NEXT: store float [[Y:%.*]], ptr [[Y_ADDR_ASCAST]], align 4
// CHECK-CPP-NEXT: store float [[Z:%.*]], ptr [[Z_ADDR_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP0:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-CPP-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-CPP-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I21:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I19]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I22:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I20]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP1]], ptr [[__X_ADDR_ASCAST_I22]], align 4
-// CHECK-CPP-NEXT: [[TMP2:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I22]], align 4
-// CHECK-CPP-NEXT: [[CALL_I23:%.*]] = call noundef float @__ocml_sin_f32(float noundef [[TMP2]]) #[[ATTR3]]
-// CHECK-CPP-NEXT: store float [[CALL_I23]], ptr [[L1_ASCAST]], align 4
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I14:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I13]] to ptr
+// CHECK-CPP-NEXT: store float [[TMP1]], ptr [[__X_ADDR_ASCAST_I14]], align 4
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I14]], align 4
+// CHECK-CPP-NEXT: [[CALL_I15:%.*]] = call noundef float @__ocml_sin_f32(float noundef [[TMP2]]) #[[ATTR3]]
+// CHECK-CPP-NEXT: store float [[CALL_I15]], ptr [[L1_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP3:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I6:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I4]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I7:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I5]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP3]], ptr [[__X_ADDR_ASCAST_I7]], align 4
-// CHECK-CPP-NEXT: [[TMP4:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I7]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I26:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I24]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I27:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I25]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP4]], ptr [[__X_ADDR_ASCAST_I27]], align 4
-// CHECK-CPP-NEXT: [[TMP5:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I27]], align 4
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I5:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I4]] to ptr
+// CHECK-CPP-NEXT: store float [[TMP3]], ptr [[__X_ADDR_ASCAST_I5]], align 4
+// CHECK-CPP-NEXT: [[TMP4:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I5]], align 4
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I17:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I16]] to ptr
+// CHECK-CPP-NEXT: store float [[TMP4]], ptr [[__X_ADDR_ASCAST_I17]], align 4
+// CHECK-CPP-NEXT: [[TMP5:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I17]], align 4
// CHECK-CPP-NEXT: [[CALL_I:%.*]] = call noundef float @__ocml_cos_f32(float noundef [[TMP5]]) #[[ATTR3]]
// CHECK-CPP-NEXT: store float [[CALL_I]], ptr [[L2_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP6:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I11:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I9]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I12:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I10]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP6]], ptr [[__X_ADDR_ASCAST_I12]], align 4
-// CHECK-CPP-NEXT: [[TMP7:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I12]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I30:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I28]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I31:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I29]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP7]], ptr [[__X_ADDR_ASCAST_I31]], align 4
-// CHECK-CPP-NEXT: [[TMP8:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I31]], align 4
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I8:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I7]] to ptr
+// CHECK-CPP-NEXT: store float [[TMP6]], ptr [[__X_ADDR_ASCAST_I8]], align 4
+// CHECK-CPP-NEXT: [[TMP7:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I8]], align 4
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I19:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I18]] to ptr
+// CHECK-CPP-NEXT: store float [[TMP7]], ptr [[__X_ADDR_ASCAST_I19]], align 4
+// CHECK-CPP-NEXT: [[TMP8:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I19]], align 4
// CHECK-CPP-NEXT: [[TMP9:%.*]] = call noundef float @llvm.fabs.f32(float [[TMP8]])
// CHECK-CPP-NEXT: store float [[TMP9]], ptr [[L3_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP10:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP11:%.*]] = load float, ptr [[Y_ADDR_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP12:%.*]] = load float, ptr [[Z_ADDR_ASCAST]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I16:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I14]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I17:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I15]] to ptr
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I11:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I10]] to ptr
// CHECK-CPP-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-CPP-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP10]], ptr [[__X_ADDR_ASCAST_I17]], align 4
+// CHECK-CPP-NEXT: store float [[TMP10]], ptr [[__X_ADDR_ASCAST_I11]], align 4
// CHECK-CPP-NEXT: store float [[TMP11]], ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-CPP-NEXT: store float [[TMP12]], ptr [[__Z_ADDR_ASCAST_I]], align 4
-// CHECK-CPP-NEXT: [[TMP13:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I17]], align 4
+// CHECK-CPP-NEXT: [[TMP13:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I11]], align 4
// CHECK-CPP-NEXT: [[TMP14:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-CPP-NEXT: [[TMP15:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I36:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I32]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I37:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I33]] to ptr
-// CHECK-CPP-NEXT: [[__Y_ADDR_ASCAST_I38:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I34]] to ptr
-// CHECK-CPP-NEXT: [[__Z_ADDR_ASCAST_I39:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I35]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP13]], ptr [[__X_ADDR_ASCAST_I37]], align 4
-// CHECK-CPP-NEXT: store float [[TMP14]], ptr [[__Y_ADDR_ASCAST_I38]], align 4
-// CHECK-CPP-NEXT: store float [[TMP15]], ptr [[__Z_ADDR_ASCAST_I39]], align 4
-// CHECK-CPP-NEXT: [[TMP16:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I37]], align 4
-// CHECK-CPP-NEXT: [[TMP17:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I38]], align 4
-// CHECK-CPP-NEXT: [[TMP18:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I39]], align 4
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I23:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I20]] to ptr
+// CHECK-CPP-NEXT: [[__Y_ADDR_ASCAST_I24:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I21]] to ptr
+// CHECK-CPP-NEXT: [[__Z_ADDR_ASCAST_I25:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I22]] to ptr
+// CHECK-CPP-NEXT: store float [[TMP13]], ptr [[__X_ADDR_ASCAST_I23]], align 4
+// CHECK-CPP-NEXT: store float [[TMP14]], ptr [[__Y_ADDR_ASCAST_I24]], align 4
+// CHECK-CPP-NEXT: store float [[TMP15]], ptr [[__Z_ADDR_ASCAST_I25]], align 4
+// CHECK-CPP-NEXT: [[TMP16:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I23]], align 4
+// CHECK-CPP-NEXT: [[TMP17:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I24]], align 4
+// CHECK-CPP-NEXT: [[TMP18:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I25]], align 4
// CHECK-CPP-NEXT: [[TMP19:%.*]] = call noundef float @llvm.fma.f32(float [[TMP16]], float [[TMP17]], float [[TMP18]])
// CHECK-CPP-NEXT: store float [[TMP19]], ptr [[L4_ASCAST]], align 4
// CHECK-CPP-NEXT: ret void
@@ -338,15 +298,11 @@ void test_math_f32(float x, float y, float z) {
// CHECK-C-LABEL: @test_math_f32_suffix(
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: [[RETVAL_I13:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I14:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I9:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-C-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-C-NEXT: [[__Z_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I9:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I10:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I4:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-C-NEXT: [[__X_ADDR_I5:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I7:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-C-NEXT: [[__X_ADDR_I4:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-C-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-C-NEXT: [[X_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-C-NEXT: [[Y_ADDR:%.*]] = alloca float, align 4, addrspace(5)
@@ -366,37 +322,33 @@ void test_math_f32(float x, float y, float z) {
// CHECK-C-NEXT: store float [[Y:%.*]], ptr [[Y_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: store float [[Z:%.*]], ptr [[Z_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP0:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-C-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-C-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-C-NEXT: [[CALL_I:%.*]] = call float @__ocml_sin_f32(float noundef [[TMP1]]) #[[ATTR3]]
// CHECK-C-NEXT: store float [[CALL_I]], ptr [[L1_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP2:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I6:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I4]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I7:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I5]] to ptr
-// CHECK-C-NEXT: store float [[TMP2]], ptr [[__X_ADDR_ASCAST_I7]], align 4
-// CHECK-C-NEXT: [[TMP3:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I7]], align 4
-// CHECK-C-NEXT: [[CALL_I8:%.*]] = call float @__ocml_cos_f32(float noundef [[TMP3]]) #[[ATTR3]]
-// CHECK-C-NEXT: store float [[CALL_I8]], ptr [[L2_ASCAST]], align 4
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I5:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I4]] to ptr
+// CHECK-C-NEXT: store float [[TMP2]], ptr [[__X_ADDR_ASCAST_I5]], align 4
+// CHECK-C-NEXT: [[TMP3:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I5]], align 4
+// CHECK-C-NEXT: [[CALL_I6:%.*]] = call float @__ocml_cos_f32(float noundef [[TMP3]]) #[[ATTR3]]
+// CHECK-C-NEXT: store float [[CALL_I6]], ptr [[L2_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP4:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I11:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I9]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I12:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I10]] to ptr
-// CHECK-C-NEXT: store float [[TMP4]], ptr [[__X_ADDR_ASCAST_I12]], align 4
-// CHECK-C-NEXT: [[TMP5:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I12]], align 4
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I8:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I7]] to ptr
+// CHECK-C-NEXT: store float [[TMP4]], ptr [[__X_ADDR_ASCAST_I8]], align 4
+// CHECK-C-NEXT: [[TMP5:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I8]], align 4
// CHECK-C-NEXT: [[TMP6:%.*]] = call float @llvm.fabs.f32(float [[TMP5]])
// CHECK-C-NEXT: store float [[TMP6]], ptr [[L3_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP7:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP8:%.*]] = load float, ptr [[Y_ADDR_ASCAST]], align 4
// CHECK-C-NEXT: [[TMP9:%.*]] = load float, ptr [[Z_ADDR_ASCAST]], align 4
-// CHECK-C-NEXT: [[RETVAL_ASCAST_I15:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I13]] to ptr
-// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I16:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I14]] to ptr
+// CHECK-C-NEXT: [[__X_ADDR_ASCAST_I10:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I9]] to ptr
// CHECK-C-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-C-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
-// CHECK-C-NEXT: store float [[TMP7]], ptr [[__X_ADDR_ASCAST_I16]], align 4
+// CHECK-C-NEXT: store float [[TMP7]], ptr [[__X_ADDR_ASCAST_I10]], align 4
// CHECK-C-NEXT: store float [[TMP8]], ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-C-NEXT: store float [[TMP9]], ptr [[__Z_ADDR_ASCAST_I]], align 4
-// CHECK-C-NEXT: [[TMP10:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I16]], align 4
+// CHECK-C-NEXT: [[TMP10:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I10]], align 4
// CHECK-C-NEXT: [[TMP11:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-C-NEXT: [[TMP12:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I]], align 4
// CHECK-C-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]])
@@ -405,15 +357,11 @@ void test_math_f32(float x, float y, float z) {
//
// CHECK-CPP-LABEL: @_Z20test_math_f32_suffixfff(
// CHECK-CPP-NEXT: entry:
-// CHECK-CPP-NEXT: [[RETVAL_I13:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I14:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I9:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[__Z_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I9:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I10:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I4:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[__X_ADDR_I5:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-CPP-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I7:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-CPP-NEXT: [[__X_ADDR_I4:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[X_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-CPP-NEXT: [[Y_ADDR:%.*]] = alloca float, align 4, addrspace(5)
@@ -433,37 +381,33 @@ void test_math_f32(float x, float y, float z) {
// CHECK-CPP-NEXT: store float [[Y:%.*]], ptr [[Y_ADDR_ASCAST]], align 4
// CHECK-CPP-NEXT: store float [[Z:%.*]], ptr [[Z_ADDR_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP0:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-CPP-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-CPP-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-CPP-NEXT: [[CALL_I:%.*]] = call noundef float @__ocml_sin_f32(float noundef [[TMP1]]) #[[ATTR3]]
// CHECK-CPP-NEXT: store float [[CALL_I]], ptr [[L1_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP2:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I6:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I4]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I7:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I5]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP2]], ptr [[__X_ADDR_ASCAST_I7]], align 4
-// CHECK-CPP-NEXT: [[TMP3:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I7]], align 4
-// CHECK-CPP-NEXT: [[CALL_I8:%.*]] = call noundef float @__ocml_cos_f32(float noundef [[TMP3]]) #[[ATTR3]]
-// CHECK-CPP-NEXT: store float [[CALL_I8]], ptr [[L2_ASCAST]], align 4
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I5:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I4]] to ptr
+// CHECK-CPP-NEXT: store float [[TMP2]], ptr [[__X_ADDR_ASCAST_I5]], align 4
+// CHECK-CPP-NEXT: [[TMP3:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I5]], align 4
+// CHECK-CPP-NEXT: [[CALL_I6:%.*]] = call noundef float @__ocml_cos_f32(float noundef [[TMP3]]) #[[ATTR3]]
+// CHECK-CPP-NEXT: store float [[CALL_I6]], ptr [[L2_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP4:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I11:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I9]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I12:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I10]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP4]], ptr [[__X_ADDR_ASCAST_I12]], align 4
-// CHECK-CPP-NEXT: [[TMP5:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I12]], align 4
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I8:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I7]] to ptr
+// CHECK-CPP-NEXT: store float [[TMP4]], ptr [[__X_ADDR_ASCAST_I8]], align 4
+// CHECK-CPP-NEXT: [[TMP5:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I8]], align 4
// CHECK-CPP-NEXT: [[TMP6:%.*]] = call noundef float @llvm.fabs.f32(float [[TMP5]])
// CHECK-CPP-NEXT: store float [[TMP6]], ptr [[L3_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP7:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP8:%.*]] = load float, ptr [[Y_ADDR_ASCAST]], align 4
// CHECK-CPP-NEXT: [[TMP9:%.*]] = load float, ptr [[Z_ADDR_ASCAST]], align 4
-// CHECK-CPP-NEXT: [[RETVAL_ASCAST_I15:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I13]] to ptr
-// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I16:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I14]] to ptr
+// CHECK-CPP-NEXT: [[__X_ADDR_ASCAST_I10:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I9]] to ptr
// CHECK-CPP-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-CPP-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
-// CHECK-CPP-NEXT: store float [[TMP7]], ptr [[__X_ADDR_ASCAST_I16]], align 4
+// CHECK-CPP-NEXT: store float [[TMP7]], ptr [[__X_ADDR_ASCAST_I10]], align 4
// CHECK-CPP-NEXT: store float [[TMP8]], ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-CPP-NEXT: store float [[TMP9]], ptr [[__Z_ADDR_ASCAST_I]], align 4
-// CHECK-CPP-NEXT: [[TMP10:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I16]], align 4
+// CHECK-CPP-NEXT: [[TMP10:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I10]], align 4
// CHECK-CPP-NEXT: [[TMP11:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-CPP-NEXT: [[TMP12:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I]], align 4
// CHECK-CPP-NEXT: [[TMP13:%.*]] = call noundef float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]])
diff --git a/clang/test/Headers/amdgcn_openmp_device_math_c.c b/clang/test/Headers/amdgcn_openmp_device_math_c.c
index 0eb5edeb5ba6c..5699e50d824db 100644
--- a/clang/test/Headers/amdgcn_openmp_device_math_c.c
+++ b/clang/test/Headers/amdgcn_openmp_device_math_c.c
@@ -8,7 +8,6 @@
// CHECK-LABEL: @test_math_int(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[__SGN_I:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
@@ -17,7 +16,6 @@
// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr
// CHECK-NEXT: store i32 [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__SGN_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__SGN_I]] to ptr
// CHECK-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_ASCAST_I]], align 4
@@ -38,7 +36,6 @@ void test_math_int(int x) {
// CHECK-LABEL: @test_math_long(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: [[__SGN_I:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
@@ -47,7 +44,6 @@ void test_math_int(int x) {
// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr
// CHECK-NEXT: store i64 [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__SGN_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__SGN_I]] to ptr
// CHECK-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_ASCAST_I]], align 8
@@ -68,7 +64,6 @@ void test_math_long(long x) {
// CHECK-LABEL: @test_math_long_long(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: [[__SGN_I:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
@@ -77,7 +72,6 @@ void test_math_long(long x) {
// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr
// CHECK-NEXT: store i64 [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__SGN_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__SGN_I]] to ptr
// CHECK-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_ASCAST_I]], align 8
diff --git a/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp b/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp
index 0fdc02edc1508..d2235f94f93cb 100644
--- a/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp
+++ b/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp
@@ -38,9 +38,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
@@ -52,15 +50,11 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
-// CHECK-NEXT: [[RETVAL_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I_I]] to ptr
// CHECK-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I_I]], align 4
@@ -72,9 +66,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
@@ -86,15 +78,11 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
-// CHECK-NEXT: [[RETVAL_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I_I]] to ptr
// CHECK-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I_I]], align 4
@@ -106,9 +94,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
@@ -120,15 +106,11 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.5
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
-// CHECK-NEXT: [[RETVAL_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I_I]] to ptr
// CHECK-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I_I]], align 4
@@ -140,11 +122,9 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.6
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Z_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
@@ -162,15 +142,12 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.7
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Z_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Z_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
@@ -180,7 +157,6 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I]], align 4
-// CHECK-NEXT: [[RETVAL_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I_I]] to ptr
// CHECK-NEXT: [[__Z_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I_I]] to ptr
@@ -198,10 +174,8 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.8
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
@@ -216,10 +190,8 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.9
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
@@ -250,10 +222,8 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.12
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
@@ -268,10 +238,8 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.13
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
diff --git a/clang/test/Headers/gpu_disabled_math.cpp b/clang/test/Headers/gpu_disabled_math.cpp
index 6e697f52120ae..8d0667eae6f97 100644
--- a/clang/test/Headers/gpu_disabled_math.cpp
+++ b/clang/test/Headers/gpu_disabled_math.cpp
@@ -18,9 +18,7 @@ extern "C" double sin(double x);
// AMDGPU-LABEL: define dso_local noundef double @_Z3food(
// AMDGPU-SAME: double noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
// AMDGPU-NEXT: [[X_ADDR:%.*]] = alloca double, align 8, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// AMDGPU-NEXT: store double [[X]], ptr [[X_ADDR_ASCAST]], align 8
// AMDGPU-NEXT: [[TMP0:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c
index 891a5abf7a72a..a9b9889b280ae 100644
--- a/clang/test/Headers/gpuintrin.c
+++ b/clang/test/Headers/gpuintrin.c
@@ -82,8 +82,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_x(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 12
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG3:![0-9]+]], !invariant.load [[META4:![0-9]+]]
@@ -98,8 +96,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_y(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 16
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG3]], !invariant.load [[META4]]
@@ -114,8 +110,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_z(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 20
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG3]], !invariant.load [[META4]]
@@ -132,7 +126,6 @@ __gpu_kernel void foo() {
// AMDGPU-NEXT: [[ENTRY:.*:]]
// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__DIM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__DIM_ADDR]] to ptr
// AMDGPU-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR_ASCAST]], align 4
// AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR_ASCAST]], align 4
@@ -143,29 +136,27 @@ __gpu_kernel void foo() {
// AMDGPU-NEXT: ]
// AMDGPU: [[SW_BB]]:
// AMDGPU-NEXT: [[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN:.*]]
// AMDGPU: [[SW_BB1]]:
// AMDGPU-NEXT: [[CALL2:%.*]] = call i32 @__gpu_num_blocks_y() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL2]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL2]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[SW_BB3]]:
// AMDGPU-NEXT: [[CALL4:%.*]] = call i32 @__gpu_num_blocks_z() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[SW_DEFAULT]]:
-// AMDGPU-NEXT: store i32 1, ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 1, ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[RETURN]]:
-// AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: ret i32 [[TMP1]]
//
//
// AMDGPU-LABEL: define internal i32 @__gpu_block_id_x(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
// AMDGPU-NEXT: ret i32 [[TMP0]]
//
@@ -173,8 +164,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_block_id_y(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
// AMDGPU-NEXT: ret i32 [[TMP0]]
//
@@ -182,8 +171,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_block_id_z(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
// AMDGPU-NEXT: ret i32 [[TMP0]]
//
@@ -193,7 +180,6 @@ __gpu_kernel void foo() {
// AMDGPU-NEXT: [[ENTRY:.*:]]
// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__DIM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__DIM_ADDR]] to ptr
// AMDGPU-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR_ASCAST]], align 4
// AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR_ASCAST]], align 4
@@ -204,29 +190,27 @@ __gpu_kernel void foo() {
// AMDGPU-NEXT: ]
// AMDGPU: [[SW_BB]]:
// AMDGPU-NEXT: [[CALL:%.*]] = call i32 @__gpu_block_id_x() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN:.*]]
// AMDGPU: [[SW_BB1]]:
// AMDGPU-NEXT: [[CALL2:%.*]] = call i32 @__gpu_block_id_y() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL2]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL2]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[SW_BB3]]:
// AMDGPU-NEXT: [[CALL4:%.*]] = call i32 @__gpu_block_id_z() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[SW_DEFAULT]]:
-// AMDGPU-NEXT: store i32 0, ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[RETURN]]:
-// AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: ret i32 [[TMP1]]
//
//
// AMDGPU-LABEL: define internal i32 @__gpu_num_threads_x(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 12
// AMDGPU-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG5]], !invariant.load [[META4]], !noundef [[META4]]
@@ -237,8 +221,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_num_threads_y(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 14
// AMDGPU-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG5]], !invariant.load [[META4]], !noundef [[META4]]
@@ -249,8 +231,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_num_threads_z(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 16
// AMDGPU-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG5]], !invariant.load [[META4]], !noundef [[META4]]
@@ -263,7 +243,6 @@ __gpu_kernel void foo() {
// AMDGPU-NEXT: [[ENTRY:.*:]]
// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__DIM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__DIM_ADDR]] to ptr
// AMDGPU-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR_ASCAST]], align 4
// AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR_ASCAST]], align 4
@@ -274,29 +253,27 @@ __gpu_kernel void foo() {
// AMDGPU-NEXT: ]
// AMDGPU: [[SW_BB]]:
// AMDGPU-NEXT: [[CALL:%.*]] = call i32 @__gpu_num_threads_x() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN:.*]]
// AMDGPU: [[SW_BB1]]:
// AMDGPU-NEXT: [[CALL2:%.*]] = call i32 @__gpu_num_threads_y() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL2]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL2]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[SW_BB3]]:
// AMDGPU-NEXT: [[CALL4:%.*]] = call i32 @__gpu_num_threads_z() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[SW_DEFAULT]]:
-// AMDGPU-NEXT: store i32 1, ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 1, ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[RETURN]]:
-// AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: ret i32 [[TMP1]]
//
//
// AMDGPU-LABEL: define internal i32 @__gpu_thread_id_x(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
// AMDGPU-NEXT: ret i32 [[TMP0]]
//
@@ -304,8 +281,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_thread_id_y(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
// AMDGPU-NEXT: ret i32 [[TMP0]]
//
@@ -313,8 +288,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_thread_id_z(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
// AMDGPU-NEXT: ret i32 [[TMP0]]
//
@@ -324,7 +297,6 @@ __gpu_kernel void foo() {
// AMDGPU-NEXT: [[ENTRY:.*:]]
// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__DIM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__DIM_ADDR]] to ptr
// AMDGPU-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR_ASCAST]], align 4
// AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR_ASCAST]], align 4
@@ -335,29 +307,27 @@ __gpu_kernel void foo() {
// AMDGPU-NEXT: ]
// AMDGPU: [[SW_BB]]:
// AMDGPU-NEXT: [[CALL:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN:.*]]
// AMDGPU: [[SW_BB1]]:
// AMDGPU-NEXT: [[CALL2:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL2]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL2]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[SW_BB3]]:
// AMDGPU-NEXT: [[CALL4:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR8]]
-// AMDGPU-NEXT: store i32 [[CALL4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 [[CALL4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[SW_DEFAULT]]:
-// AMDGPU-NEXT: store i32 0, ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: br label %[[RETURN]]
// AMDGPU: [[RETURN]]:
-// AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL_ASCAST]], align 4
+// AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[RETVAL]], align 4
// AMDGPU-NEXT: ret i32 [[TMP1]]
//
//
// AMDGPU-LABEL: define internal i32 @__gpu_num_lanes(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.wavefrontsize()
// AMDGPU-NEXT: ret i32 [[TMP0]]
//
@@ -365,8 +335,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_lane_id(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
// AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP0]])
// AMDGPU-NEXT: ret i32 [[TMP1]]
@@ -375,8 +343,6 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i64 @__gpu_lane_mask(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
// AMDGPU-NEXT: ret i64 [[TMP0]]
//
@@ -384,10 +350,8 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_read_first_lane_u32(
// AMDGPU-SAME: i64 noundef [[__LANE_MASK:%.*]], i32 noundef [[__X:%.*]]) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
// AMDGPU-NEXT: [[__X_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__LANE_MASK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__LANE_MASK_ADDR]] to ptr
// AMDGPU-NEXT: [[__X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR]] to ptr
// AMDGPU-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR_ASCAST]], align 8
@@ -400,12 +364,10 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i64 @__gpu_read_first_lane_u64(
// AMDGPU-SAME: i64 noundef [[__LANE_MASK:%.*]], i64 noundef [[__X:%.*]]) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
// AMDGPU-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
// AMDGPU-NEXT: [[__X_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
// AMDGPU-NEXT: [[__HI:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__LO:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__LANE_MASK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__LANE_MASK_ADDR]] to ptr
// AMDGPU-NEXT: [[__X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR]] to ptr
// AMDGPU-NEXT: [[__HI_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__HI]] to ptr
@@ -437,10 +399,8 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i64 @__gpu_ballot(
// AMDGPU-SAME: i64 noundef [[__LANE_MASK:%.*]], i1 noundef zeroext [[__X:%.*]]) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
// AMDGPU-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
// AMDGPU-NEXT: [[__X_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__LANE_MASK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__LANE_MASK_ADDR]] to ptr
// AMDGPU-NEXT: [[__X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR]] to ptr
// AMDGPU-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR_ASCAST]], align 8
@@ -475,13 +435,11 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_shuffle_idx_u32(
// AMDGPU-SAME: i64 noundef [[__LANE_MASK:%.*]], i32 noundef [[__IDX:%.*]], i32 noundef [[__X:%.*]], i32 noundef [[__WIDTH:%.*]]) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
// AMDGPU-NEXT: [[__IDX_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__X_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__WIDTH_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGPU-NEXT: [[__LANE:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__LANE_MASK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__LANE_MASK_ADDR]] to ptr
// AMDGPU-NEXT: [[__IDX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__IDX_ADDR]] to ptr
// AMDGPU-NEXT: [[__X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR]] to ptr
@@ -509,9 +467,7 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i64 @__gpu_first_lane_id(
// AMDGPU-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
// AMDGPU-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__LANE_MASK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__LANE_MASK_ADDR]] to ptr
// AMDGPU-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR_ASCAST]], align 8
// AMDGPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR_ASCAST]], align 8
@@ -528,9 +484,7 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal zeroext i1 @__gpu_is_first_in_lane(
// AMDGPU-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5)
// AMDGPU-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[__LANE_MASK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__LANE_MASK_ADDR]] to ptr
// AMDGPU-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR_ASCAST]], align 8
// AMDGPU-NEXT: [[CALL:%.*]] = call i32 @__gpu_lane_id() #[[ATTR8]]
@@ -1422,3 +1376,9 @@ __gpu_kernel void foo() {
// SPIRV-NEXT: [[ENTRY:.*:]]
// SPIRV-NEXT: call void @llvm.trap()
// SPIRV-NEXT: ret void
+//
+//.
+// AMDGPU: [[RNG3]] = !{i32 1, i32 0}
+// AMDGPU: [[META4]] = !{}
+// AMDGPU: [[RNG5]] = !{i16 1, i16 1025}
+//.
diff --git a/clang/test/Headers/gpuintrin_lang.c b/clang/test/Headers/gpuintrin_lang.c
index e3db72d5ff928..433d24b18d654 100644
--- a/clang/test/Headers/gpuintrin_lang.c
+++ b/clang/test/Headers/gpuintrin_lang.c
@@ -49,10 +49,6 @@ extern "C" [[clang::sycl_external]] int foo() { return __gpu_thread_id_x(); }
// HIP-LABEL: define dso_local i32 @foo(
// HIP-SAME: ) #[[ATTR0:[0-9]+]] {
// HIP-NEXT: [[ENTRY:.*:]]
-// HIP-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4, addrspace(5)
-// HIP-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// HIP-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// HIP-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// HIP-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
// HIP-NEXT: ret i32 [[TMP0]]
//
@@ -71,10 +67,6 @@ extern "C" [[clang::sycl_external]] int foo() { return __gpu_thread_id_x(); }
// SYCL-LABEL: define spir_func i32 @foo(
// SYCL-SAME: ) #[[ATTR0:[0-9]+]] {
// SYCL-NEXT: [[ENTRY:.*:]]
-// SYCL-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4
-// SYCL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
-// SYCL-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
-// SYCL-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr [[RETVAL_I]] to ptr addrspace(4)
// SYCL-NEXT: [[SPV_THREAD_ID_IN_GROUP_I:%.*]] = call i64 @llvm.spv.thread.id.in.group.i64(i32 0)
// SYCL-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPV_THREAD_ID_IN_GROUP_I]] to i32
// SYCL-NEXT: ret i32 [[CONV_I]]
@@ -82,10 +74,6 @@ extern "C" [[clang::sycl_external]] int foo() { return __gpu_thread_id_x(); }
// C89-LABEL: define dso_local i32 @foo(
// C89-SAME: ) #[[ATTR0:[0-9]+]] {
// C89-NEXT: [[ENTRY:.*:]]
-// C89-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4, addrspace(5)
-// C89-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// C89-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// C89-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// C89-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
// C89-NEXT: ret i32 [[TMP0]]
//
diff --git a/clang/test/Headers/openmp-device-functions-bool.c b/clang/test/Headers/openmp-device-functions-bool.c
index bb01096027849..3d5ea9009c1d9 100644
--- a/clang/test/Headers/openmp-device-functions-bool.c
+++ b/clang/test/Headers/openmp-device-functions-bool.c
@@ -18,12 +18,10 @@ typedef _Bool ockl_bool;
// CHECK-LABEL: define hidden float @test_fdot2
// CHECK-SAME: (<2 x half> noundef [[A:%.*]], <2 x half> noundef [[B:%.*]], float noundef [[C:%.*]], i1 noundef zeroext [[S:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
@@ -31,14 +29,14 @@ typedef _Bool ockl_bool;
// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8
-// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[S]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[S_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1
-// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
-// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> noundef [[TMP0]], <2 x half> noundef [[TMP1]], float noundef [[TMP2]], i1 noundef zeroext [[TOBOOL]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> noundef [[TMP0]], <2 x half> noundef [[TMP1]], float noundef [[TMP2]], i1 noundef zeroext [[LOADEDV]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret float [[CALL]]
//
EXTERN_C float test_fdot2(__2f16 a, __2f16 b, float c, ockl_bool s) {
@@ -56,11 +54,9 @@ enum my_bool {
// CHECK-C-LABEL: define hidden i32 @use_my_bool
// CHECK-C-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-C-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-C-NEXT: [[T:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-C-NEXT: [[F:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-C-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-C-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
// CHECK-C-NEXT: [[T_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[T]] to ptr
// CHECK-C-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
diff --git a/clang/test/Headers/openmp_new_nothrow.cpp b/clang/test/Headers/openmp_new_nothrow.cpp
index 1208387013547..d46ba46f327d8 100644
--- a/clang/test/Headers/openmp_new_nothrow.cpp
+++ b/clang/test/Headers/openmp_new_nothrow.cpp
@@ -22,8 +22,6 @@ extern const std::nothrow_t nothrow;
// AMDGPU-LABEL: define hidden noundef ptr @_Z17new_stuff_nothrowv
// AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
// AMDGPU-NEXT: entry:
-// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
-// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-NEXT: [[CALL:%.*]] = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 4, ptr noundef nonnull align 1 dereferenceable(1) addrspacecast (ptr addrspace(1) @nothrow to ptr)) #[[ATTR4:[0-9]+]]
// AMDGPU-NEXT: ret ptr [[CALL]]
//
@@ -52,16 +50,12 @@ int* new_stuff_nothrow() {
// AMDGPU-CXX03-LABEL: define hidden noundef ptr @_Z23new_array_stuff_nothrowv
// AMDGPU-CXX03-SAME: () #[[ATTR0]] {
// AMDGPU-CXX03-NEXT: entry:
-// AMDGPU-CXX03-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
-// AMDGPU-CXX03-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-CXX03-NEXT: [[CALL:%.*]] = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 136, ptr noundef nonnull align 1 dereferenceable(1) addrspacecast (ptr addrspace(1) @nothrow to ptr)) #[[ATTR4]]
// AMDGPU-CXX03-NEXT: ret ptr [[CALL]]
//
// AMDGPU-CXX11-LABEL: define hidden noundef ptr @_Z23new_array_stuff_nothrowv
// AMDGPU-CXX11-SAME: () #[[ATTR0]] {
// AMDGPU-CXX11-NEXT: entry:
-// AMDGPU-CXX11-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
-// AMDGPU-CXX11-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGPU-CXX11-NEXT: [[CALL:%.*]] = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 136, ptr noundef nonnull align 1 dereferenceable(1) addrspacecast (ptr addrspace(1) @nothrow to ptr)) #[[ATTR7:[0-9]+]]
// AMDGPU-CXX11-NEXT: ret ptr [[CALL]]
//
diff --git a/clang/test/OpenMP/amdgcn_weak_alias.c b/clang/test/OpenMP/amdgcn_weak_alias.c
index 33c7dc0041810..c647d7eb89f8c 100644
--- a/clang/test/OpenMP/amdgcn_weak_alias.c
+++ b/clang/test/OpenMP/amdgcn_weak_alias.c
@@ -26,6 +26,11 @@
// HOST: @Three_var = weak alias i32, ptr @__Three_var
// HOST: @Three_var_ = alias i32, ptr @__Three_var
//.
+// DEVICE: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace(1) constant i32 0
// DEVICE: @__Two_var = addrspace(1) global i32 2, align 4
// DEVICE: @__Three_var = addrspace(1) global i32 3, align 4
// DEVICE: @Two = weak hidden alias i32 (), ptr @__Two
@@ -61,8 +66,6 @@ extern int __attribute__((alias("__One_var"))) One_var_;
// DEVICE-LABEL: define hidden i32 @__Two(
// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: ret i32 2
//
int __Two(void) { return 2; }
@@ -83,8 +86,6 @@ extern int __attribute__((alias("__Two_var"))) Two_var_;
// DEVICE-LABEL: define hidden i32 @__Three(
// DEVICE-SAME: ) #[[ATTR0]] {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: ret i32 3
//
int __Three(void) { return 3; }
@@ -94,3 +95,24 @@ int Three(void) __attribute__ ((weak, alias("__Three")));
int Three_(void) __attribute__ ((alias("__Three")));
extern int __attribute__((weak, alias("__Three_var"))) Three_var;
extern int __attribute__((alias("__Three_var"))) Three_var_;
+//.
+// HOST: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// DEVICE: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"__Two_var", i32 0, i32 0}
+// HOST: [[META1:![0-9]+]] = !{i32 1, !"__Three_var", i32 0, i32 1}
+// HOST: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name}
+// HOST: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.1}
+// HOST: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// HOST: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"__Two_var", i32 0, i32 0}
+// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"__Three_var", i32 0, i32 1}
+// DEVICE: [[META2:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// DEVICE: [[META3:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// DEVICE: [[META4:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// DEVICE: [[META5:![0-9]+]] = !{i32 7, !"openmp-device", i32 51}
+// DEVICE: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/OpenMP/amdgcn_weak_alias.cpp b/clang/test/OpenMP/amdgcn_weak_alias.cpp
index d1ffff4b1ca01..b8b3c5877c9e8 100644
--- a/clang/test/OpenMP/amdgcn_weak_alias.cpp
+++ b/clang/test/OpenMP/amdgcn_weak_alias.cpp
@@ -17,6 +17,11 @@
// HOST: @_Z4Fourv = weak alias i32 (), ptr @_Z6__Fourv
// HOST: @_Z5Four_v = alias i32 (), ptr @_Z6__Fourv
//.
+// DEVICE: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace(1) constant i32 0
// DEVICE: @_Z3Twov = weak hidden alias i32 (), ptr @_Z5__Twov
// DEVICE: @_Z3Twof = weak hidden alias float (float), ptr @_Z5__Twof
// DEVICE: @_Z4Two_v = hidden alias i32 (), ptr @_Z5__Twov
@@ -54,8 +59,6 @@ float One_(float f) __attribute__((alias("_Z5__Onef")));
// DEVICE-LABEL: define hidden noundef i32 @_Z5__Twov(
// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: ret i32 2
//
int __Two(void) { return 2; }
@@ -71,9 +74,7 @@ int __Two(void) { return 2; }
// DEVICE-LABEL: define hidden noundef float @_Z5__Twof(
// DEVICE-SAME: float noundef [[F:%.*]]) #[[ATTR0]] {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// DEVICE-NEXT: [[F_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
// DEVICE-NEXT: store float [[F]], ptr [[F_ADDR_ASCAST]], align 4
// DEVICE-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR_ASCAST]], align 4
@@ -96,8 +97,6 @@ float Two_(float f) __attribute__((alias("_Z5__Twof")));
// DEVICE-LABEL: define linkonce_odr hidden noundef i32 @_Z7__Threev(
// DEVICE-SAME: ) #[[ATTR0]] comdat {
// DEVICE-NEXT: [[ENTRY:.*:]]
-// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// DEVICE-NEXT: ret i32 3
//
constexpr int __Three(void) { return 3; }
@@ -113,3 +112,18 @@ int Three_(void) __attribute__((alias("_Z7__Threev")));
constexpr int __Four(void) { return 4; }
int Four(void) __attribute__((weak, alias("_Z6__Fourv")));
int Four_(void) __attribute__((alias("_Z6__Fourv")));
+//.
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// HOST: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// DEVICE: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// DEVICE: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 51}
+// DEVICE: [[META4:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
More information about the cfe-commits
mailing list