[clang] 16b781e - [AMDGPU][clang] Fix __builtin_nontemporal_store() failure on AMDGPU

via cfe-commits cfe-commits at lists.llvm.org
Wed Dec 1 21:55:43 PST 2021


Author: skc7
Date: 2021-12-02T05:53:25Z
New Revision: 16b781e6d16dead414a7036c8b59f1700ea49251

URL: https://github.com/llvm/llvm-project/commit/16b781e6d16dead414a7036c8b59f1700ea49251
DIFF: https://github.com/llvm/llvm-project/commit/16b781e6d16dead414a7036c8b59f1700ea49251.diff

LOG: [AMDGPU][clang] Fix  __builtin_nontemporal_store() failure on AMDGPU

Reviewed By: yaxunl, sameerds

Differential Revision: https://reviews.llvm.org/D114849

Added: 
    clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 849423c8b9bae..5d6df59cc4059 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -170,8 +170,9 @@ static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
 
   // Convert the type of the pointer to a pointer to the stored type.
   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
+  unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
   Value *BC = CGF.Builder.CreateBitCast(
-      Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
+      Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
   LV.setNontemporal(true);
   CGF.EmitStoreOfScalar(Val, LV, false);

diff  --git a/clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl b/clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl
new file mode 100644
index 0000000000000..539d857080e27
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl
@@ -0,0 +1,8 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// CHECK-LABEL: @test_non_temporal_store_kernel
+// CHECK: store i32 0, i32 addrspace(1)* %{{.*}}, align 4, !tbaa !{{.*}}, !nontemporal {{.*}}
+
+kernel void test_non_temporal_store_kernel(global unsigned int* io) {
+  __builtin_nontemporal_store(0, io);
+}


        


More information about the cfe-commits mailing list