[clang] [llvm] [clang][OMPIRBuilder] Fix reduction codegen for SPIR-V (PR #162529)
Nick Sarnie via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 8 14:49:01 PDT 2025
https://github.com/sarnex updated https://github.com/llvm/llvm-project/pull/162529
>From 67ae789e8f20acfb2f36b270d475177c72d65688 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <nick.sarnie at intel.com>
Date: Wed, 8 Oct 2025 07:52:02 -0700
Subject: [PATCH 1/2] [OMPIRBuilder] Fix reduction codegen for SPIR-V
Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>
---
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 7 +++++-
clang/test/OpenMP/spirv_reduction.cpp | 22 +++++++++++++++++++
.../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 ++-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 11 ++++++----
4 files changed, 37 insertions(+), 6 deletions(-)
create mode 100644 clang/test/OpenMP/spirv_reduction.cpp
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 4272d8b1a1f51..3613b6a143d42 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -869,6 +869,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
CGM.getLangOpts().OpenMPOffloadMandatory,
/*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
+ Config.setDefaultTargetAS(
+ CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
OMPBuilder.setConfig(Config);
if (!CGM.getLangOpts().OpenMPIsTargetDevice)
@@ -1243,7 +1245,10 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
if (WFn)
ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
- llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, CGM.Int8PtrTy);
+ llvm::Type *FnPtrTy = llvm::PointerType::get(
+ CGF.getLLVMContext(), CGM.getDataLayout().getProgramAddressSpace());
+
+ llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, FnPtrTy);
// Create a private scope that will globalize the arguments
// passed from the outside of the target region.
diff --git a/clang/test/OpenMP/spirv_reduction.cpp b/clang/test/OpenMP/spirv_reduction.cpp
new file mode 100644
index 0000000000000..e0e7549de716c
--- /dev/null
+++ b/clang/test/OpenMP/spirv_reduction.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
+
+// expected-no-diagnostics
+
+// CHECK: call spir_func addrspace(9) void @__kmpc_parallel_51(ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}} to ptr addrspace(4)),
+// CHECK-SAME: i32 %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(4) {{.*}}, ptr addrspace(4) %{{.*}}, i64 {{.*}})
+
+// CHECK: call addrspace(9) i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}} to ptr addrspace(4)),
+// CHECK-SAME: ptr addrspace(4) %{{.*}}, i32 1024, i64 4, ptr addrspace(4) %{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}})
+
+int main() {
+ int matrix_sum = 0;
+ #pragma omp target teams distribute parallel for \
+ reduction(+:matrix_sum) \
+ map(tofrom:matrix_sum)
+ for (int i = 0; i < 100; i++) {
+
+ }
+
+ return 0;
+}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 01ca8da759ef7..1694a33510d79 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -42,6 +42,7 @@ __OMP_TYPE(Double)
OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
OMP_TYPE(Int63, Type::getIntNTy(Ctx, 63))
+OMP_TYPE(FuncPtrTy, PointerType::get(Ctx, M.getDataLayout().getProgramAddressSpace()))
__OMP_PTR_TYPE(VoidPtr)
__OMP_PTR_TYPE(VoidPtrPtr)
@@ -471,7 +472,7 @@ __OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr, KernelLaunchEn
__OMP_RTL(__kmpc_target_deinit, false, Void,)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
- VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
+ FuncPtrTy, VoidPtr, VoidPtrPtr, SizeTy)
__OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
__OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
__OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64, Int8)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 5980ee35a5cd2..286ed039b1214 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3623,7 +3623,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
// 1. Build a list of reduction variables.
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
auto Size = ReductionInfos.size();
- Type *PtrTy = PointerType::getUnqual(Ctx);
+ Type *PtrTy = PointerType::get(Ctx, Config.getDefaultTargetAS());
+ Type *FuncPtrTy =
+ Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
Type *RedArrayTy = ArrayType::get(PtrTy, Size);
CodeGenIP = Builder.saveIP();
Builder.restoreIP(AllocaIP);
@@ -3667,9 +3669,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
Builder.getInt64(MaxDataSize * ReductionInfos.size());
if (!IsTeamsReduction) {
Value *SarFuncCast =
- Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy);
+ Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, FuncPtrTy);
Value *WcFuncCast =
- Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy);
+ Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
WcFuncCast};
Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
@@ -10072,13 +10074,14 @@ void OpenMPIRBuilder::initializeTypes(Module &M) {
LLVMContext &Ctx = M.getContext();
StructType *T;
unsigned DefaultTargetAS = Config.getDefaultTargetAS();
+ unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
- VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
+ VarName##Ptr = PointerType::get(Ctx, ProgramAS);
#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
T = StructType::getTypeByName(Ctx, StructName); \
if (!T) \
>From a8b2dd2469de9b9649b4b8615d9995b6231f6852 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <nick.sarnie at intel.com>
Date: Wed, 8 Oct 2025 14:48:30 -0700
Subject: [PATCH 2/2] use existing test
Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>
---
clang/test/OpenMP/reduction_complex.c | 17 ++++++++++++++---
clang/test/OpenMP/spirv_reduction.cpp | 22 ----------------------
2 files changed, 14 insertions(+), 25 deletions(-)
delete mode 100644 clang/test/OpenMP/spirv_reduction.cpp
diff --git a/clang/test/OpenMP/reduction_complex.c b/clang/test/OpenMP/reduction_complex.c
index e00caa8f90fdf..b79903ff4d37b 100644
--- a/clang/test/OpenMP/reduction_complex.c
+++ b/clang/test/OpenMP/reduction_complex.c
@@ -10,6 +10,17 @@
// RUN: -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
// RUN: -o - | FileCheck %s --check-prefix CHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN: -triple powerpc64le-unknown-unknown \
+// RUN: -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o \
+// RUN: %t-ppc-host-spv.bc
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN: -triple spirv64-intel -DCUA \
+// RUN: -fopenmp-targets=spirv64-intel -emit-llvm %s \
+// RUN: -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-spv.bc \
+// RUN: -o - | FileCheck %s --check-prefix CHECK
+
// expected-no-diagnostics
int foo() {
int i;
@@ -46,9 +57,9 @@ int foo() {
// CHECK-NEXT: %[[VAL_244:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_232]], i64 0, i64 0
// CHECK-NEXT: %[[VAL_245:.*]] = getelementptr { float, float }, ptr %[[VAL_243]], i64 1
// CHECK-NEXT: %[[VAL_246:.*]] = load i64, ptr %[[VAL_243]], align 8
-// CHECK-NEXT: %[[VAL_247:.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK-NEXT: %[[VAL_247:.*]] = call{{.*}}i32 @__kmpc_get_warp_size()
// CHECK-NEXT: %[[VAL_248:.*]] = trunc i32 %[[VAL_247]] to i16
-// CHECK-NEXT: %[[VAL_249:.*]] = call i64 @__kmpc_shuffle_int64(i64 %[[VAL_246]], i16 %[[VAL_240]], i16 %[[VAL_248]])
+// CHECK-NEXT: %[[VAL_249:.*]] = call{{.*}}i64 @__kmpc_shuffle_int64(i64 %[[VAL_246]], i16 %[[VAL_240]], i16 %[[VAL_248]])
// CHECK-NEXT: store i64 %[[VAL_249]], ptr %[[VAL_233]], align 8
// CHECK-NEXT: %[[VAL_250:.*]] = getelementptr i64, ptr %[[VAL_243]], i64 1
// CHECK-NEXT: %[[VAL_251:.*]] = getelementptr i64, ptr %[[VAL_233]], i64 1
@@ -67,7 +78,7 @@ int foo() {
// CHECK-NEXT: %[[VAL_263:.*]] = or i1 %[[VAL_262]], %[[VAL_261]]
// CHECK-NEXT: br i1 %[[VAL_263]], label %[[VAL_264:.*]], label %[[VAL_265:.*]]
// CHECK: then: ; preds = %[[VAL_266:.*]]
-// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l{{[0-9]+}}_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr %[[VAL_238]], ptr %[[VAL_232]]) #2
+// CHECK-NEXT: call{{.*}}void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l{{[0-9]+}}_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr %[[VAL_238]], ptr %[[VAL_232]]) #2
// CHECK-NEXT: br label %[[VAL_267:.*]]
// CHECK: else: ; preds = %[[VAL_266]]
// CHECK-NEXT: br label %[[VAL_267]]
diff --git a/clang/test/OpenMP/spirv_reduction.cpp b/clang/test/OpenMP/spirv_reduction.cpp
deleted file mode 100644
index e0e7549de716c..0000000000000
--- a/clang/test/OpenMP/spirv_reduction.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
-
-// expected-no-diagnostics
-
-// CHECK: call spir_func addrspace(9) void @__kmpc_parallel_51(ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}} to ptr addrspace(4)),
-// CHECK-SAME: i32 %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(4) {{.*}}, ptr addrspace(4) %{{.*}}, i64 {{.*}})
-
-// CHECK: call addrspace(9) i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}} to ptr addrspace(4)),
-// CHECK-SAME: ptr addrspace(4) %{{.*}}, i32 1024, i64 4, ptr addrspace(4) %{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}})
-
-int main() {
- int matrix_sum = 0;
- #pragma omp target teams distribute parallel for \
- reduction(+:matrix_sum) \
- map(tofrom:matrix_sum)
- for (int i = 0; i < 100; i++) {
-
- }
-
- return 0;
-}
More information about the llvm-commits
mailing list