[clang] [llvm] [HLSL] Implement elementwise firstbitlow builtin (PR #116858)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 19 10:57:53 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-hlsl
Author: Ashley Coleman (V-FEXrt)
<details>
<summary>Changes</summary>
Closes https://github.com/llvm/llvm-project/issues/99116
- [x] Implement firstbitlow clang builtin,
- [x] Link firstbitlow clang builtin with hlsl_intrinsics.h
- [x] Add sema checks for firstbitlow to CheckHLSLBuiltinFunctionCall in SemaChecking.cpp
- [x] Add codegen for firstbitlow to EmitHLSLBuiltinExpr in CGBuiltin.cpp
- [x] Add codegen tests to clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
- [x] Add sema tests to clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
- [x] Create the int_dx_firstbitlow intrinsic in IntrinsicsDirectX.td
- [x] Create the DXILOpMapping of int_dx_firstbitlow to 32 in DXIL.td
- [x] Create the firstbitlow.ll and firstbitlow_errors.ll tests in llvm/test/CodeGen/DirectX/
- [x] Create the int_spv_firstbitlow intrinsic in IntrinsicsSPIRV.td
- [x] In SPIRVInstructionSelector.cpp create the firstbitlow lowering and map it to int_spv_firstbitlow in SPIRVInstructionSelector::selectIntrinsic.
- [x] Create SPIR-V backend test case in llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
---
Patch is 33.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116858.diff
16 Files Affected:
- (modified) clang/include/clang/Basic/Builtins.td (+6)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+8-1)
- (modified) clang/lib/CodeGen/CGHLSLRuntime.h (+1)
- (modified) clang/lib/Headers/hlsl/hlsl_intrinsics.h (+72)
- (modified) clang/lib/Sema/SemaHLSL.cpp (+2-1)
- (added) clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl (+153)
- (modified) clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl (+2-4)
- (added) clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl (+26)
- (modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+1)
- (modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+1)
- (modified) llvm/lib/Target/DirectX/DXIL.td (+12)
- (modified) llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp (+1)
- (modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+174)
- (added) llvm/test/CodeGen/DirectX/firstbitlow.ll (+47)
- (added) llvm/test/CodeGen/DirectX/firstbitlow_error.ll (+10)
- (added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll (+104)
``````````diff
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index e866605ac05c09..afe0a311f236d8 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4810,6 +4810,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_frac"];
let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9e0c0bff0125c0..a65b96684f18eb 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18956,7 +18956,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
"hlsl.dot4add.u8packed");
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
-
Value *X = EmitScalarExpr(E->getArg(0));
return Builder.CreateIntrinsic(
@@ -18964,6 +18963,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
}
+ case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
+ Value *X = EmitScalarExpr(E->getArg(0));
+
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/ConvertType(E->getType()),
+ CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
+ nullptr, "hlsl.firstbitlow");
+ }
case Builtin::BI__builtin_hlsl_lerp: {
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 381a5959ec098e..c9eb1b08ff6ba6 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -96,6 +96,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow)
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 2ee3827d720495..610ddcea203d90 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1086,6 +1086,78 @@ uint3 firstbithigh(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(uint64_t4);
+//===----------------------------------------------------------------------===//
+// firstbitlow builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T firstbitlow(T Val)
+/// \brief Returns the location of the first set bit starting from the lowest
+/// order bit and working upward, per component.
+/// \param Val the input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int16_t4);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint64_t4);
+
//===----------------------------------------------------------------------===//
// floor builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 65b0d9cd65637f..c90ba9ae9e0f84 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1947,7 +1947,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
- case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
+ case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
+ case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return true;
diff --git a/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl b/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
new file mode 100644
index 00000000000000..5d490fabc5bc8d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
@@ -0,0 +1,153 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s -DTARGET=spv
+
+#ifdef __HLSL_ENABLE_16_BIT
+// CHECK-LABEL: test_firstbitlow_ushort
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
+uint test_firstbitlow_ushort(uint16_t p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ushort2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
+uint2 test_firstbitlow_ushort2(uint16_t2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ushort3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
+uint3 test_firstbitlow_ushort3(uint16_t3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ushort4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
+uint4 test_firstbitlow_ushort4(uint16_t4 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_short
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
+uint test_firstbitlow_short(int16_t p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_short2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
+uint2 test_firstbitlow_short2(int16_t2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_short3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
+uint3 test_firstbitlow_short3(int16_t3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_short4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
+uint4 test_firstbitlow_short4(int16_t4 p0) {
+ return firstbitlow(p0);
+}
+#endif // __HLSL_ENABLE_16_BIT
+
+// CHECK-LABEL: test_firstbitlow_uint
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
+uint test_firstbitlow_uint(uint p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_uint2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
+uint2 test_firstbitlow_uint2(uint2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_uint3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
+uint3 test_firstbitlow_uint3(uint3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_uint4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
+uint4 test_firstbitlow_uint4(uint4 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ulong
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
+uint test_firstbitlow_ulong(uint64_t p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ulong2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
+uint2 test_firstbitlow_ulong2(uint64_t2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ulong3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
+uint3 test_firstbitlow_ulong3(uint64_t3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ulong4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
+uint4 test_firstbitlow_ulong4(uint64_t4 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_int
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
+uint test_firstbitlow_int(int p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_int2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
+uint2 test_firstbitlow_int2(int2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_int3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
+uint3 test_firstbitlow_int3(int3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_int4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
+uint4 test_firstbitlow_int4(int4 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_long
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
+uint test_firstbitlow_long(int64_t p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_long2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
+uint2 test_firstbitlow_long2(int64_t2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_long3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
+uint3 test_firstbitlow_long3(int64_t3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_long4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
+uint4 test_firstbitlow_long4(int64_t4 p0) {
+ return firstbitlow(p0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
index 1912ab3ae806b3..b4024418dbba4f 100644
--- a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
@@ -17,12 +17,10 @@ double test_int_builtin(double p0) {
double2 test_int_builtin_2(double2 p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
- // expected-error at -1 {{1st argument must be a vector of integers
- // (was 'double2' (aka 'vector<double, 2>'))}}
+ // expected-error at -1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
}
float test_int_builtin_3(float p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
- // expected-error at -1 {{1st argument must be a vector of integers
- // (was 'float')}}
+ // expected-error at -1 {{1st argument must be a vector of integers (was 'double')}}
}
diff --git a/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
new file mode 100644
index 00000000000000..95c25e9e2fb60d
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
+
+int test_too_few_arg() {
+ return firstbitlow();
+ // expected-error at -1 {{no matching function for call to 'firstbitlow'}}
+}
+
+int test_too_many_arg(int p0) {
+ return firstbitlow(p0, p0);
+ // expected-error at -1 {{no matching function for call to 'firstbitlow'}}
+}
+
+double test_int_builtin(double p0) {
+ return firstbitlow(p0);
+ // expected-error at -1 {{call to 'firstbitlow' is ambiguous}}
+}
+
+double2 test_int_builtin_2(double2 p0) {
+ return __builtin_hlsl_elementwise_firstbitlow(p0);
+ // expected-error at -1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
+}
+
+float test_int_builtin_3(float p0) {
+ return __builtin_hlsl_elementwise_firstbitlow(p0);
+ // expected-error at -1 {{1st argument must be a vector of integers (was 'double')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 6093664c908dc5..6148dcaff64470 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -103,4 +103,5 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
+def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index f29eb7ee22b2d2..63e99524511143 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -106,6 +106,7 @@ let TargetPrefix = "spv" in {
[IntrNoMem]>;
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
+ def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
// Read a value from the image buffer. It does not translate directly to a
// single OpImageRead because the result type is not necessarily a 4 element
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 078f0591a67515..efdd91c9c27f62 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -564,6 +564,18 @@ def CountBits : DXILOp<31, unaryBits> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def FirstbitLo : DXILOp<32, unaryBits> {
+ let Doc = "Returns the location of the first set bit starting from "
+ "the lowest order bit and working upward.";
+ let LLVMIntrinsic = int_dx_firstbitlow;
+ let arguments = [OverloadTy];
+ let result = Int32Ty;
+ let overloads =
+ [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def FirstbitHi : DXILOp<33, unaryBits> {
let Doc = "Returns the location of the first set bit starting from "
"the highest order bit and working downward.";
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index b0436a39423405..c37e0dbb82e5a0 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -34,6 +34,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_splitdouble:
case Intrinsic::dx_firstbituhigh:
case Intrinsic::dx_firstbitshigh:
+ case Intrinsic::dx_firstbitlow:
return true;
default:
return false;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 8a8835e0269200..c033f04305d0f0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -106,6 +106,18 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectFirstBitHigh64(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I, bool IsSigned) const;
+ bool selectFirstBitLow(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectFirstBitLow16(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectFirstBitLow32(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, Register SrcReg) const;
+
+ bool selectFirstBitLow64(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
bool selectGlobalValue(Register ResVReg, MachineInstr &I,
const MachineInstr *Init = nullptr) const;
@@ -2786,6 +2798,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false);
case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb
return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true);
+ case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb
+ return selectFirstBitLow(ResVReg, ResType, I);
case Intrinsic::spv_group_memory_barrier_with_group_sync: {
bool Result = true;
auto MemSemConstant =
@@ -3158,6 +3172,166 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
}
}
+bool SPIRVInstructionSelector::selectFirstBitLow16(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ // OpUConvert treats the operand bits as an unsigned i16 and zero extends it
+ // to an unsigned i32. As this leaves all the least significant bits unchanged
+ // the first set bit from the LSB side doesn't change.
+ Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
+ bool Result = selectNAryOpWithSrcs(
+ ExtReg, ResType, I, {I.getOperand(2).getReg()}, SPIRV::OpUConvert);
+ return Result && selectFirstBitLow32(ResVReg, ResType, I, ExtReg);
+}
+
+bool SPIRVInstructionSelector::selectFirstBitLow32(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ Register SrcReg) const {
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+ .addImm(GL::FindILsb)
+ .addUse(SrcReg)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ Register OpReg = I.getOperand(2).getReg();
+
+ // 1. Split int64 into 2 pieces using a bitcast
+ unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
+ SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/116858
More information about the cfe-commits
mailing list