[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Feb 19 01:18:03 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This reverts commit 6e0b0038cd65ce726ce404305a06e1cf33e36cca.
This breaks the rocm-device-libs build, so it should not ship in the release.
---
Patch is 214.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127771.diff
21 Files Affected:
- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-3)
- (modified) clang/lib/CodeGen/CGBlocks.cpp (+1-2)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+2-9)
- (modified) clang/test/CodeGen/scoped-fence-ops.c (+120-61)
- (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+70-99)
- (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+16-20)
- (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+118-164)
- (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+220-275)
- (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14)
- (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (+4-7)
- (modified) clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl (+3-3)
- (modified) clang/test/CodeGenOpenCL/blocks.cl (+12-11)
- (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+4-428)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+56-87)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl (+12-18)
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl (+2-2)
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl (+1-1)
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl (+1-1)
- (modified) clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (+1-1)
- (modified) clang/test/CodeGenOpenCL/opencl_types.cl (+1-1)
- (modified) clang/test/Index/pipe-size.cl (+2-2)
``````````diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 0d308cb6af969..9ea366af56a52 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
TargetInfo::adjust(Diags, Opts);
// ToDo: There are still a few places using default address space as private
- // address space in OpenCL, which needs to be cleaned up, then the references
- // to OpenCL can be removed from the following line.
- setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
+ // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
+ // can be removed from the following line.
+ setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
!isAMDGCN(getTriple()));
}
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index a7584a95c8ca7..f38f86c792f69 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
DI->setLocation(D->getLocation());
DI->EmitDeclareOfBlockLiteralArgVariable(
*BlockInfo, D->getName(), argNum,
- cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
- Builder);
+ cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
}
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 7ec9d59bfed5c..5237533364294 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
/*IndexTypeQuals=*/0);
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
llvm::Value *TmpPtr = Tmp.getPointer();
- // The EmitLifetime* pair expect a naked Alloca as their last argument,
- // however for cases where the default AS is not the Alloca AS, Tmp is
- // actually the Alloca ascasted to the default AS, hence the
- // stripPointerCasts()
- llvm::Value *Alloca = TmpPtr->stripPointerCasts();
llvm::Value *TmpSize = EmitLifetimeStart(
- CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
+ CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
@@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateAlignedStore(
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
}
- // Return the Alloca itself rather than a potential ascast as this is only
- // used by the paired EmitLifetimeEnd.
- return std::tie(ElemPtr, TmpSize, Alloca);
+ return std::tie(ElemPtr, TmpSize, TmpPtr);
};
// Could have events and/or varargs.
diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c
index d83ae05b0aea2..20cbb511a1758 100644
--- a/clang/test/CodeGen/scoped-fence-ops.c
+++ b/clang/test/CodeGen/scoped-fence-ops.c
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL12 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL20 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-linux-gnu -ffreestanding \
@@ -30,34 +30,62 @@ void fe1a() {
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
}
-// AMDGCN-LABEL: define hidden void @fe1b(
-// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
-// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
-// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
-// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]]
-// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]]
-// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]]
-// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]]
-// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]]
-// AMDGCN-NEXT: ]
-// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
-// AMDGCN-NEXT: ret void
-// AMDGCN: [[ACQUIRE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") acquire
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[RELEASE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[ACQREL]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") acq_rel
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SEQCST]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12-LABEL: define hidden void @fe1b(
+// AMDGCN-CL12-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL12-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL12-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
+// AMDGCN-CL12-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL12-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// AMDGCN-CL12-NEXT: i32 2, label %[[ACQUIRE]]
+// AMDGCN-CL12-NEXT: i32 3, label %[[RELEASE:.*]]
+// AMDGCN-CL12-NEXT: i32 4, label %[[ACQREL:.*]]
+// AMDGCN-CL12-NEXT: i32 5, label %[[SEQCST:.*]]
+// AMDGCN-CL12-NEXT: ]
+// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL12-NEXT: ret void
+// AMDGCN-CL12: [[ACQUIRE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acquire
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[RELEASE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[ACQREL]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acq_rel
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SEQCST]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+// AMDGCN-CL20-LABEL: define hidden void @fe1b(
+// AMDGCN-CL20-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL20-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL20-NEXT: store i32 [[ORD]], ptr addrspace(5) [[ORD_ADDR]], align 4
+// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[ORD_ADDR]], align 4
+// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL20-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// AMDGCN-CL20-NEXT: i32 2, label %[[ACQUIRE]]
+// AMDGCN-CL20-NEXT: i32 3, label %[[RELEASE:.*]]
+// AMDGCN-CL20-NEXT: i32 4, label %[[ACQREL:.*]]
+// AMDGCN-CL20-NEXT: i32 5, label %[[SEQCST:.*]]
+// AMDGCN-CL20-NEXT: ]
+// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL20-NEXT: ret void
+// AMDGCN-CL20: [[ACQUIRE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acquire
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[RELEASE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[ACQREL]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acq_rel
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SEQCST]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
//
// SPIRV-LABEL: define hidden spir_func void @fe1b(
// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
@@ -119,37 +147,68 @@ void fe1b(int ord) {
__scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
}
-// AMDGCN-LABEL: define hidden void @fe1c(
-// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
-// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
-// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
-// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
-// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
-// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
-// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
-// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
-// AMDGCN-NEXT: ]
-// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
-// AMDGCN-NEXT: ret void
-// AMDGCN: [[DEVICE_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("agent") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SYSTEM_SCOPE]]:
-// AMDGCN-NEXT: fence release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[WORKGROUP_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[WAVEFRONT_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("wavefront") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SINGLE_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("singlethread") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12-LABEL: define hidden void @fe1c(
+// AMDGCN-CL12-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL12-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL12-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
+// AMDGCN-CL12-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL12-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: ]
+// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL12-NEXT: ret void
+// AMDGCN-CL12: [[DEVICE_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("agent") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SYSTEM_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[WORKGROUP_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[WAVEFRONT_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("wavefront") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SINGLE_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("singlethread") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+// AMDGCN-CL20-LABEL: define hidden void @fe1c(
+// AMDGCN-CL20-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL20-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL20-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL20-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: ]
+// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL20-NEXT: ret void
+// AMDGCN-CL20: [[DEVICE_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("agent") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SYSTEM_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[WORKGROUP_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[WAVEFRONT_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("wavefront") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SINGLE_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("singlethread") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
//
// SPIRV-LABEL: define hidden spir_func void @fe1c(
// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 57d056b0ff9d5..7377b5bcbc347 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -69,11 +69,9 @@ struct LargeStructOneMember g_s;
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
-// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
-// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
-// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
+// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
+// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
// SPIR-LABEL: define dso_local spir_func void @foo(
@@ -152,22 +150,19 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
-// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
-// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
-// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
-// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
+// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
+// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
-// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
-// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
+// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
-// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
-// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
+// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
+// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @ker(
@@ -250,11 +245,10 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @foo_large(
-// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
+// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
-// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
-// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
+// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
+// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @foo_large(
@@ -325,18 +319,15 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
-// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// AMDGCN20-NEXT: [[TM...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/127771
More information about the llvm-branch-commits
mailing list