[llvm] d59f7f2 - [clang][OMPIRBuilder] Fix reduction codegen for SPIR-V (#162529)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 9 07:25:40 PDT 2025


Author: Nick Sarnie
Date: 2025-10-09T14:25:36Z
New Revision: d59f7f2717b88c7a9764d466c026cef8833c6d1f

URL: https://github.com/llvm/llvm-project/commit/d59f7f2717b88c7a9764d466c026cef8833c6d1f
DIFF: https://github.com/llvm/llvm-project/commit/d59f7f2717b88c7a9764d466c026cef8833c6d1f.diff

LOG: [clang][OMPIRBuilder] Fix reduction codegen for SPIR-V (#162529)

When creating function pointers, make sure the pointer is in the program
address space.

Also fix a spot where I forgot to set `setDefaultTargetAS` and one spot
where we didn't use the default AS for a pointer.

---------

Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
    clang/test/OpenMP/reduction_complex.c
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 4272d8b1a1f51..3613b6a143d42 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -869,6 +869,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
       CGM.getLangOpts().OpenMPOffloadMandatory,
       /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
       hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
+  Config.setDefaultTargetAS(
+      CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
   OMPBuilder.setConfig(Config);
 
   if (!CGM.getLangOpts().OpenMPIsTargetDevice)
@@ -1243,7 +1245,10 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
     llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
     if (WFn)
       ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
-    llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, CGM.Int8PtrTy);
+    llvm::Type *FnPtrTy = llvm::PointerType::get(
+        CGF.getLLVMContext(), CGM.getDataLayout().getProgramAddressSpace());
+
+    llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, FnPtrTy);
 
     // Create a private scope that will globalize the arguments
     // passed from the outside of the target region.

diff  --git a/clang/test/OpenMP/reduction_complex.c b/clang/test/OpenMP/reduction_complex.c
index e00caa8f90fdf..b79903ff4d37b 100644
--- a/clang/test/OpenMP/reduction_complex.c
+++ b/clang/test/OpenMP/reduction_complex.c
@@ -10,6 +10,17 @@
 // RUN:  -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
 // RUN:  -o - | FileCheck %s --check-prefix CHECK
 
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple powerpc64le-unknown-unknown \
+// RUN:  -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o \
+// RUN:  %t-ppc-host-spv.bc
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple spirv64-intel -DCUA \
+// RUN:  -fopenmp-targets=spirv64-intel -emit-llvm %s \
+// RUN:  -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-spv.bc \
+// RUN:  -o - | FileCheck %s --check-prefix CHECK
+
 // expected-no-diagnostics
 int foo() {
   int i;
@@ -46,9 +57,9 @@ int foo() {
 // CHECK-NEXT:         %[[VAL_244:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_232]], i64 0, i64 0
 // CHECK-NEXT:         %[[VAL_245:.*]] = getelementptr { float, float }, ptr %[[VAL_243]], i64 1
 // CHECK-NEXT:         %[[VAL_246:.*]] = load i64, ptr %[[VAL_243]], align 8
-// CHECK-NEXT:         %[[VAL_247:.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK-NEXT:         %[[VAL_247:.*]] = call{{.*}}i32 @__kmpc_get_warp_size()
 // CHECK-NEXT:         %[[VAL_248:.*]] = trunc i32 %[[VAL_247]] to i16
-// CHECK-NEXT:         %[[VAL_249:.*]] = call i64 @__kmpc_shuffle_int64(i64 %[[VAL_246]], i16 %[[VAL_240]], i16 %[[VAL_248]])
+// CHECK-NEXT:         %[[VAL_249:.*]] = call{{.*}}i64 @__kmpc_shuffle_int64(i64 %[[VAL_246]], i16 %[[VAL_240]], i16 %[[VAL_248]])
 // CHECK-NEXT:         store i64 %[[VAL_249]], ptr %[[VAL_233]], align 8
 // CHECK-NEXT:         %[[VAL_250:.*]] = getelementptr i64, ptr %[[VAL_243]], i64 1
 // CHECK-NEXT:         %[[VAL_251:.*]] = getelementptr i64, ptr %[[VAL_233]], i64 1
@@ -67,7 +78,7 @@ int foo() {
 // CHECK-NEXT:         %[[VAL_263:.*]] = or i1 %[[VAL_262]], %[[VAL_261]]
 // CHECK-NEXT:         br i1 %[[VAL_263]], label %[[VAL_264:.*]], label %[[VAL_265:.*]]
 // CHECK:       then:                                             ; preds = %[[VAL_266:.*]]
-// CHECK-NEXT:         call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l{{[0-9]+}}_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr %[[VAL_238]], ptr %[[VAL_232]]) #2
+// CHECK-NEXT:         call{{.*}}void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l{{[0-9]+}}_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr %[[VAL_238]], ptr %[[VAL_232]]) #2
 // CHECK-NEXT:         br label %[[VAL_267:.*]]
 // CHECK:       else:                                             ; preds = %[[VAL_266]]
 // CHECK-NEXT:         br label %[[VAL_267]]

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 01ca8da759ef7..1694a33510d79 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -42,6 +42,7 @@ __OMP_TYPE(Double)
 
 OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
 OMP_TYPE(Int63, Type::getIntNTy(Ctx, 63))
+OMP_TYPE(FuncPtrTy, PointerType::get(Ctx, M.getDataLayout().getProgramAddressSpace()))
 
 __OMP_PTR_TYPE(VoidPtr)
 __OMP_PTR_TYPE(VoidPtrPtr)
@@ -471,7 +472,7 @@ __OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr, KernelLaunchEn
 __OMP_RTL(__kmpc_target_deinit, false, Void,)
 __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
 __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
-          VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
+          FuncPtrTy, VoidPtr, VoidPtrPtr, SizeTy)
 __OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
 __OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
 __OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64, Int8)

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 5980ee35a5cd2..286ed039b1214 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3623,7 +3623,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
   // 1. Build a list of reduction variables.
   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
   auto Size = ReductionInfos.size();
-  Type *PtrTy = PointerType::getUnqual(Ctx);
+  Type *PtrTy = PointerType::get(Ctx, Config.getDefaultTargetAS());
+  Type *FuncPtrTy =
+      Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
   Type *RedArrayTy = ArrayType::get(PtrTy, Size);
   CodeGenIP = Builder.saveIP();
   Builder.restoreIP(AllocaIP);
@@ -3667,9 +3669,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
       Builder.getInt64(MaxDataSize * ReductionInfos.size());
   if (!IsTeamsReduction) {
     Value *SarFuncCast =
-        Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy);
+        Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, FuncPtrTy);
     Value *WcFuncCast =
-        Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy);
+        Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
     Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
                      WcFuncCast};
     Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
@@ -10072,13 +10074,14 @@ void OpenMPIRBuilder::initializeTypes(Module &M) {
   LLVMContext &Ctx = M.getContext();
   StructType *T;
   unsigned DefaultTargetAS = Config.getDefaultTargetAS();
+  unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
   VarName##Ty = ArrayType::get(ElemTy, ArraySize);                             \
   VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
   VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg);            \
-  VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
+  VarName##Ptr = PointerType::get(Ctx, ProgramAS);
 #define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...)                      \
   T = StructType::getTypeByName(Ctx, StructName);                              \
   if (!T)                                                                      \


        


More information about the llvm-commits mailing list