[clang] [HLSL] Don't use CreateRuntimeFunction for intrinsics (PR #145334)
Nikita Popov via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 23 07:04:23 PDT 2025
https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/145334
>From 9ce18af49d38fcd5031ee4b432ab2f092818a045 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Mon, 23 Jun 2025 15:52:59 +0200
Subject: [PATCH] [HLSL] Don't use CreateRuntimeFunction for intrinsics
HLSL uses CreateRuntimeFunction for two intrinsics. This is pretty
weird thing to do, and doesn't match what the rest of the file
does.
I suspect this might be because these are convergent calls, but
the intrinsics themselves are already marked convergent, so it's
not necessary for clang to manually add the attribute.
---
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 37 +++++--------------
.../CodeGenHLSL/builtins/WaveActiveMax.hlsl | 6 +--
.../CodeGenHLSL/builtins/WaveActiveSum.hlsl | 6 +--
.../CodeGenHLSL/builtins/WaveReadLaneAt.hlsl | 12 +++---
4 files changed, 21 insertions(+), 40 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 2a60a0909c93e..58165185b6711 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -676,35 +676,23 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
case Builtin::BI__builtin_hlsl_wave_active_sum: {
// Due to the use of variadic arguments, explicitly retreive argument
Value *OpExpr = EmitScalarExpr(E->getArg(0));
- llvm::FunctionType *FT = llvm::FunctionType::get(
- OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
Intrinsic::ID IID = getWaveActiveSumIntrinsic(
getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
E->getArg(0)->getType());
- // Get overloaded name
- std::string Name =
- Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
- return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
- /*Local=*/false,
- /*AssumeConvergent=*/true),
+ return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+ &CGM.getModule(), IID, {OpExpr->getType()}),
ArrayRef{OpExpr}, "hlsl.wave.active.sum");
}
case Builtin::BI__builtin_hlsl_wave_active_max: {
// Due to the use of variadic arguments, explicitly retreive argument
Value *OpExpr = EmitScalarExpr(E->getArg(0));
- llvm::FunctionType *FT = llvm::FunctionType::get(
- OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
Intrinsic::ID IID = getWaveActiveMaxIntrinsic(
getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
E->getArg(0)->getType());
- // Get overloaded name
- std::string Name =
- Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
- return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
- /*Local=*/false,
- /*AssumeConvergent=*/true),
+ return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+ &CGM.getModule(), IID, {OpExpr->getType()}),
ArrayRef{OpExpr}, "hlsl.wave.active.max");
}
case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
@@ -739,18 +727,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
// create our function type.
Value *OpExpr = EmitScalarExpr(E->getArg(0));
Value *OpIndex = EmitScalarExpr(E->getArg(1));
- llvm::FunctionType *FT = llvm::FunctionType::get(
- OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
- false);
-
- // Get overloaded name
- std::string Name =
- Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
- ArrayRef{OpExpr->getType()}, &CGM.getModule());
- return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
- /*Local=*/false,
- /*AssumeConvergent=*/true),
- ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
+ return EmitRuntimeCall(
+ Intrinsic::getOrInsertDeclaration(
+ &CGM.getModule(), CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
+ {OpExpr->getType()}),
+ ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
}
case Builtin::BI__builtin_hlsl_elementwise_sign: {
auto *Arg0 = E->getArg(0);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl
index 7891cfc1989af..be05a17cc3692 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl
@@ -16,7 +16,7 @@ int test_int(int expr) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.max.i32([[TY]]) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.max.i32([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.max.i32([[TY]]) #[[#attr:]]
// CHECK-LABEL: test_uint64_t
uint64_t test_uint64_t(uint64_t expr) {
@@ -27,7 +27,7 @@ uint64_t test_uint64_t(uint64_t expr) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.umax.i64([[TY]]) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.umax.i64([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.umax.i64([[TY]]) #[[#attr:]]
// Test basic lowering to runtime function call with array and float value.
@@ -40,7 +40,7 @@ float4 test_floatv4(float4 expr) {
}
// CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]
-// CHECK-SPIRV: declare spir_func [[TY1]] @llvm.spv.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]
+// CHECK-SPIRV: declare [[TY1]] @llvm.spv.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]
// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl
index 4bf423ccc1b82..1fc93c62c8db0 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl
@@ -16,7 +16,7 @@ int test_int(int expr) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.sum.i32([[TY]]) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.sum.i32([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.sum.i32([[TY]]) #[[#attr:]]
// CHECK-LABEL: test_uint64_t
uint64_t test_uint64_t(uint64_t expr) {
@@ -27,7 +27,7 @@ uint64_t test_uint64_t(uint64_t expr) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.usum.i64([[TY]]) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.sum.i64([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.sum.i64([[TY]]) #[[#attr:]]
// Test basic lowering to runtime function call with array and float value.
@@ -40,6 +40,6 @@ float4 test_floatv4(float4 expr) {
}
// CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.reduce.sum.v4f32([[TY1]]) #[[#attr]]
-// CHECK-SPIRV: declare spir_func [[TY1]] @llvm.spv.wave.reduce.sum.v4f32([[TY1]]) #[[#attr]]
+// CHECK-SPIRV: declare [[TY1]] @llvm.spv.wave.reduce.sum.v4f32([[TY1]]) #[[#attr]]
// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
diff --git a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
index c94ef8a677350..8c787a42618ac 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
@@ -17,7 +17,7 @@ int test_int(int expr, uint idx) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.i32([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.i32([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.i32([[TY]], i32) #[[#attr:]]
// CHECK-LABEL: test_uint
uint test_uint(uint expr, uint idx) {
@@ -38,7 +38,7 @@ int64_t test_int64_t(int64_t expr, uint idx) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.i64([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.i64([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.i64([[TY]], i32) #[[#attr:]]
// CHECK-LABEL: test_uint64_t
uint64_t test_uint64_t(uint64_t expr, uint idx) {
@@ -60,7 +60,7 @@ int16_t test_int16(int16_t expr, uint idx) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.i16([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.i16([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.i16([[TY]], i32) #[[#attr:]]
// CHECK-LABEL: test_uint16
uint16_t test_uint16(uint16_t expr, uint idx) {
@@ -84,7 +84,7 @@ half test_half(half expr, uint idx) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.f16([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.f16([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.f16([[TY]], i32) #[[#attr:]]
// CHECK-LABEL: test_double
double test_double(double expr, uint idx) {
@@ -96,7 +96,7 @@ double test_double(double expr, uint idx) {
}
// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.f64([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.f64([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.f64([[TY]], i32) #[[#attr:]]
// CHECK-LABEL: test_floatv4
float4 test_floatv4(float4 expr, uint idx) {
@@ -108,6 +108,6 @@ float4 test_floatv4(float4 expr, uint idx) {
}
// CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.readlane.v4f32([[TY1]], i32) #[[#attr]]
-// CHECK-SPIRV: declare spir_func [[TY1]] @llvm.spv.wave.readlane.v4f32([[TY1]], i32) #[[#attr]]
+// CHECK-SPIRV: declare [[TY1]] @llvm.spv.wave.readlane.v4f32([[TY1]], i32) #[[#attr]]
// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
More information about the cfe-commits
mailing list