[clang] [llvm] [HLSL] Implement elementwise firstbitlow builtin (PR #116858)
Ashley Coleman via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 17 14:25:58 PST 2024
https://github.com/V-FEXrt updated https://github.com/llvm/llvm-project/pull/116858
>From a63e05d2e090edf7834fb62296bccd071a8e38b8 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Thu, 14 Nov 2024 11:53:39 -0700
Subject: [PATCH 01/11] [HLSL] Implement elementwise firstbitlow builtin
---
clang/include/clang/Basic/Builtins.td | 6 +
clang/lib/CodeGen/CGBuiltin.cpp | 9 +-
clang/lib/CodeGen/CGHLSLRuntime.h | 1 +
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 72 ++++++++
clang/lib/Sema/SemaHLSL.cpp | 3 +-
.../CodeGenHLSL/builtins/firstbitlow.hlsl | 153 ++++++++++++++++
.../BuiltIns/firstbithigh-errors.hlsl | 6 +-
.../SemaHLSL/BuiltIns/firstbitlow-errors.hlsl | 26 +++
llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 +
llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 +
llvm/lib/Target/DirectX/DXIL.td | 13 ++
.../DirectX/DirectXTargetTransformInfo.cpp | 1 +
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 169 ++++++++++++++++++
llvm/test/CodeGen/DirectX/firstbitlow.ll | 47 +++++
.../test/CodeGen/DirectX/firstbitlow_error.ll | 10 ++
.../SPIRV/hlsl-intrinsics/firstbitlow.ll | 104 +++++++++++
16 files changed, 616 insertions(+), 6 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
create mode 100644 clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
create mode 100644 llvm/test/CodeGen/DirectX/firstbitlow.ll
create mode 100644 llvm/test/CodeGen/DirectX/firstbitlow_error.ll
create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 32a09e2ceb3857..a4fb671e479307 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4834,6 +4834,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_frac"];
let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c2e983eebebc10..cbd4c931b05b05 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19255,7 +19255,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
"hlsl.dot4add.u8packed");
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
-
Value *X = EmitScalarExpr(E->getArg(0));
return Builder.CreateIntrinsic(
@@ -19263,6 +19262,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
}
+ case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
+ Value *X = EmitScalarExpr(E->getArg(0));
+
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/ConvertType(E->getType()),
+ CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
+ nullptr, "hlsl.firstbitlow");
+ }
case Builtin::BI__builtin_hlsl_lerp: {
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index bb120c8b5e9e60..df285e185173dc 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -97,6 +97,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow)
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 1126e13600f8af..c132c300da27a4 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1121,6 +1121,78 @@ uint3 firstbithigh(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(uint64_t4);
+//===----------------------------------------------------------------------===//
+// firstbitlow builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T firstbitlow(T Val)
+/// \brief Returns the location of the first set bit starting from the lowest
+/// order bit and working upward, per component.
+/// \param Val the input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int16_t4);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint64_t4);
+
//===----------------------------------------------------------------------===//
// floor builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 88db3e12541193..bf74c62aa8f50f 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2014,7 +2014,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
- case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
+ case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
+ case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return true;
diff --git a/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl b/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
new file mode 100644
index 00000000000000..5d490fabc5bc8d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
@@ -0,0 +1,153 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s -DTARGET=spv
+
+#ifdef __HLSL_ENABLE_16_BIT
+// CHECK-LABEL: test_firstbitlow_ushort
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
+uint test_firstbitlow_ushort(uint16_t p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ushort2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
+uint2 test_firstbitlow_ushort2(uint16_t2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ushort3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
+uint3 test_firstbitlow_ushort3(uint16_t3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ushort4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
+uint4 test_firstbitlow_ushort4(uint16_t4 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_short
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
+uint test_firstbitlow_short(int16_t p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_short2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
+uint2 test_firstbitlow_short2(int16_t2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_short3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
+uint3 test_firstbitlow_short3(int16_t3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_short4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
+uint4 test_firstbitlow_short4(int16_t4 p0) {
+ return firstbitlow(p0);
+}
+#endif // __HLSL_ENABLE_16_BIT
+
+// CHECK-LABEL: test_firstbitlow_uint
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
+uint test_firstbitlow_uint(uint p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_uint2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
+uint2 test_firstbitlow_uint2(uint2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_uint3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
+uint3 test_firstbitlow_uint3(uint3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_uint4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
+uint4 test_firstbitlow_uint4(uint4 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ulong
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
+uint test_firstbitlow_ulong(uint64_t p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ulong2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
+uint2 test_firstbitlow_ulong2(uint64_t2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ulong3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
+uint3 test_firstbitlow_ulong3(uint64_t3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_ulong4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
+uint4 test_firstbitlow_ulong4(uint64_t4 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_int
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
+uint test_firstbitlow_int(int p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_int2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
+uint2 test_firstbitlow_int2(int2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_int3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
+uint3 test_firstbitlow_int3(int3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_int4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
+uint4 test_firstbitlow_int4(int4 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_long
+// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
+uint test_firstbitlow_long(int64_t p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_long2
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
+uint2 test_firstbitlow_long2(int64_t2 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_long3
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
+uint3 test_firstbitlow_long3(int64_t3 p0) {
+ return firstbitlow(p0);
+}
+
+// CHECK-LABEL: test_firstbitlow_long4
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
+uint4 test_firstbitlow_long4(int64_t4 p0) {
+ return firstbitlow(p0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
index 1912ab3ae806b3..b4024418dbba4f 100644
--- a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
@@ -17,12 +17,10 @@ double test_int_builtin(double p0) {
double2 test_int_builtin_2(double2 p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
- // expected-error at -1 {{1st argument must be a vector of integers
- // (was 'double2' (aka 'vector<double, 2>'))}}
+ // expected-error at -1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
}
float test_int_builtin_3(float p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
- // expected-error at -1 {{1st argument must be a vector of integers
- // (was 'float')}}
+ // expected-error at -1 {{1st argument must be a vector of integers (was 'double')}}
}
diff --git a/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
new file mode 100644
index 00000000000000..95c25e9e2fb60d
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
+
+int test_too_few_arg() {
+ return firstbitlow();
+ // expected-error at -1 {{no matching function for call to 'firstbitlow'}}
+}
+
+int test_too_many_arg(int p0) {
+ return firstbitlow(p0, p0);
+ // expected-error at -1 {{no matching function for call to 'firstbitlow'}}
+}
+
+double test_int_builtin(double p0) {
+ return firstbitlow(p0);
+ // expected-error at -1 {{call to 'firstbitlow' is ambiguous}}
+}
+
+double2 test_int_builtin_2(double2 p0) {
+ return __builtin_hlsl_elementwise_firstbitlow(p0);
+ // expected-error at -1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
+}
+
+float test_int_builtin_3(float p0) {
+ return __builtin_hlsl_elementwise_firstbitlow(p0);
+ // expected-error at -1 {{1st argument must be a vector of integers (was 'double')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 5696345a617fe5..1a182250b610bb 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -110,6 +110,7 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
+def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 1ae3129774e507..1b8dfc416441ac 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -109,6 +109,7 @@ let TargetPrefix = "spv" in {
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
+ def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_spv_bufferUpdateCounter
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index cff6cdce813ded..a208ba7663a3b4 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -618,6 +618,19 @@ def CountBits : DXILOp<31, unaryBits> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def FirstbitLo : DXILOp<32, unaryBits> {
+ let Doc = "Returns the location of the first set bit starting from "
+ "the lowest order bit and working upward.";
+ let LLVMIntrinsic = int_dx_firstbitlow;
+ let arguments = [OverloadTy];
+ let result = Int32Ty;
+ let overloads =
+ [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ // TODO: check these
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def FirstbitHi : DXILOp<33, unaryBits> {
let Doc = "Returns the location of the first set bit starting from "
"the highest order bit and working downward.";
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 2ca4e23594d56d..0c0d324b21cddd 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -45,6 +45,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_splitdouble:
case Intrinsic::dx_firstbituhigh:
case Intrinsic::dx_firstbitshigh:
+ case Intrinsic::dx_firstbitlow:
return true;
default:
return false;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 3a98b74b3d6757..fe8879a6991047 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -106,6 +106,18 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectFirstBitHigh64(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I, bool IsSigned) const;
+ bool selectFirstBitLow(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectFirstBitLow16(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectFirstBitLow32(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, Register SrcReg) const;
+
+ bool selectFirstBitLow64(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
bool selectGlobalValue(Register ResVReg, MachineInstr &I,
const MachineInstr *Init = nullptr) const;
@@ -2895,6 +2907,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false);
case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb
return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true);
+ case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb
+ // (true?)
+ return selectFirstBitLow(ResVReg, ResType, I);
case Intrinsic::spv_group_memory_barrier_with_group_sync: {
bool Result = true;
auto MemSemConstant =
@@ -3292,6 +3307,160 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
}
}
+bool SPIRVInstructionSelector::selectFirstBitLow16(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ // OpUConvert treats the operand bits as an unsigned i16 and zero extends it
+ // to an unsigned i32. As this leaves all the least significant bits unchanged
+ // the first set bit from the LSB side doesn't change.
+ Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
+ bool Result = selectNAryOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()},
+ SPIRV::OpUConvert);
+ return Result && selectFirstBitLow32(ResVReg, ResType, I, ExtReg);
+}
+
+bool SPIRVInstructionSelector::selectFirstBitLow32(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ Register SrcReg) const {
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+ .addImm(GL::FindILsb)
+ .addUse(SrcReg)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ Register OpReg = I.getOperand(2).getReg();
+
+ // 1. Split int64 into 2 pieces using a bitcast
+ unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
+ SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
+ MachineIRBuilder MIRBuilder(I);
+ SPIRVType *PostCastType =
+ GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder);
+ Register BitcastReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
+ bool Result =
+ selectUnOpWithSrc(BitcastReg, PostCastType, I, OpReg, SPIRV::OpBitcast);
+
+ // 2. Find the first set bit from the LSB side for all the pieces in #1
+ Register FBLReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
+ Result = Result && selectFirstBitLow32(FBLReg, PostCastType, I, BitcastReg);
+
+ // 3. Split result vector into high bits and low bits
+ Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
+ Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
+
+ bool ZeroAsNull = STI.isOpenCLEnv();
+ bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
+ if (IsScalarRes) {
+ // if scalar do a vector extract
+ Result = Result && selectNAryOpWithSrcs(
+ HighReg, ResType, I,
+ {FBLReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)},
+ SPIRV::OpVectorExtractDynamic);
+ Result = Result && selectNAryOpWithSrcs(
+ LowReg, ResType, I,
+ {FBLReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)},
+ SPIRV::OpVectorExtractDynamic);
+ } else {
+ // if vector do a shufflevector
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SPIRV::OpVectorShuffle))
+ .addDef(HighReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(FBLReg)
+ // Per the spec, repeat the vector if only one vec is needed
+ .addUse(FBLReg);
+
+ // high bits are store in even indexes. Extract them from FBLReg
+ for (unsigned j = 0; j < ComponentCount * 2; j += 2) {
+ MIB.addImm(j);
+ }
+ Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SPIRV::OpVectorShuffle))
+ .addDef(LowReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(FBLReg)
+ // Per the spec, repeat the vector if only one vec is needed
+ .addUse(FBLReg);
+
+ // low bits are store in odd indexes. Extract them from FBLReg
+ for (unsigned j = 1; j < ComponentCount * 2; j += 2) {
+ MIB.addImm(j);
+ }
+ Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+ }
+
+ // 4. Check if result of each bottom 32 bits is == -1
+ SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII);
+ Register NegOneReg;
+ Register Reg0;
+ Register Reg32;
+ unsigned SelectOp;
+ unsigned AddOp;
+
+ if (IsScalarRes) {
+ NegOneReg =
+ GR.getOrCreateConstInt((unsigned)-1, I, ResType, TII, ZeroAsNull);
+ Reg0 = GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull);
+ Reg32 = GR.getOrCreateConstInt(32, I, ResType, TII, ZeroAsNull);
+ SelectOp = SPIRV::OpSelectSISCond;
+ AddOp = SPIRV::OpIAddS;
+ } else {
+ BoolType = GR.getOrCreateSPIRVVectorType(BoolType, ComponentCount, MIRBuilder);
+ NegOneReg =
+ GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull);
+ Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull);
+ Reg32 = GR.getOrCreateConstVector(32, I, ResType, TII, ZeroAsNull);
+ SelectOp = SPIRV::OpSelectVIVCond;
+ AddOp = SPIRV::OpIAddV;
+ }
+
+ // Check if the low bits are == -1; true if -1
+ Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType));
+ Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I, {LowReg, NegOneReg},
+ SPIRV::OpIEqual);
+
+ // Select high bits if true in BReg, otherwise low bits
+ Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
+ Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I, {BReg, HighReg, LowReg},
+ SelectOp);
+
+ // Add 32 for high bits, 0 for low bits
+ Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
+ Result = Result &&
+ selectNAryOpWithSrcs(ValReg, ResType, I, {BReg, Reg32, Reg0}, SelectOp);
+
+ return Result &&
+ selectNAryOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp);
+}
+
+bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ // FindILsb intrinsic only supports 32 bit integers
+ Register OpReg = I.getOperand(2).getReg();
+ SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg);
+
+ switch (GR.getScalarOrVectorBitWidth(OpType)) {
+ case 16:
+ return selectFirstBitLow16(ResVReg, ResType, I);
+ case 32:
+ return selectFirstBitLow32(ResVReg, ResType, I, OpReg);
+ case 64:
+ return selectFirstBitLow64(ResVReg, ResType, I);
+ default:
+ report_fatal_error("spv_firstbitlow only supports 16,32,64 bits.");
+ }
+}
+
bool SPIRVInstructionSelector::selectAllocaArray(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
diff --git a/llvm/test/CodeGen/DirectX/firstbitlow.ll b/llvm/test/CodeGen/DirectX/firstbitlow.ll
new file mode 100644
index 00000000000000..884ec1164fc992
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/firstbitlow.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for firstbitlow are generated for all integer types.
+
+define noundef i32 @test_firstbitlow_short(i16 noundef %a) {
+entry:
+; CHECK: call i32 @dx.op.unaryBits.i16(i32 32, i16 %{{.*}})
+ %elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i16(i16 %a)
+ ret i32 %elt.firstbitlow
+}
+
+define noundef i32 @test_firstbitlow_int(i32 noundef %a) {
+entry:
+; CHECK: call i32 @dx.op.unaryBits.i32(i32 32, i32 %{{.*}})
+ %elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i32(i32 %a)
+ ret i32 %elt.firstbitlow
+}
+
+define noundef i32 @test_firstbitlow_long(i64 noundef %a) {
+entry:
+; CHECK: call i32 @dx.op.unaryBits.i64(i32 32, i64 %{{.*}})
+ %elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i64(i64 %a)
+ ret i32 %elt.firstbitlow
+}
+
+define noundef <4 x i32> @test_firstbitlow_vec4_i32(<4 x i32> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee3]])
+ ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3
+ %2 = call <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32> %a)
+ ret <4 x i32> %2
+}
+
+declare i32 @llvm.dx.firstbitlow.i16(i16)
+declare i32 @llvm.dx.firstbitlow.i32(i32)
+declare i32 @llvm.dx.firstbitlow.i64(i64)
+declare <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/DirectX/firstbitlow_error.ll b/llvm/test/CodeGen/DirectX/firstbitlow_error.ll
new file mode 100644
index 00000000000000..d8b9333067f4ac
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/firstbitlow_error.ll
@@ -0,0 +1,10 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation firstbitshigh does not support double overload type
+; CHECK: invalid intrinsic signature
+
+define noundef double @firstbitlow_double(double noundef %a) {
+entry:
+ %1 = call double @llvm.dx.firstbitlow.f64(double %a)
+ ret double %1
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
new file mode 100644
index 00000000000000..9ebd8cc511eb6c
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
@@ -0,0 +1,104 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: [[glsl_450_ext:%.+]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: OpMemoryModel Logical GLSL450
+; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0
+; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2
+; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4
+; CHECK-DAG: [[const_zero:%.*]] = OpConstant [[u32_t]] 0
+; CHECK-DAG: [[const_zerox2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_zero]] [[const_zero]]
+; CHECK-DAG: [[const_one:%.*]] = OpConstant [[u32_t]] 1
+; CHECK-DAG: [[const_thirty_two:%.*]] = OpConstant [[u32_t]] 32
+; CHECK-DAG: [[const_thirty_twox2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_thirty_two]] [[const_thirty_two]]
+; CHECK-DAG: [[const_neg_one:%.*]] = OpConstant [[u32_t]] 4294967295
+; CHECK-DAG: [[const_neg_onex2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg_one]] [[const_neg_one]]
+; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0
+; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2
+; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0
+; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2
+; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool
+; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2
+
+; CHECK-LABEL: Begin function firstbitlow_i32
+define noundef i32 @firstbitlow_i32(i32 noundef %a) {
+entry:
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindILsb [[a]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i32(i32 %a)
+ ret i32 %elt.firstbitlow
+}
+
+; CHECK-LABEL: Begin function firstbitlow_2xi32
+define noundef <2 x i32> @firstbitlow_2xi32(<2 x i32> noundef %a) {
+entry:
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i32(<2 x i32> %a)
+ ret <2 x i32> %elt.firstbitlow
+}
+
+; CHECK-LABEL: Begin function firstbitlow_i16
+define noundef i32 @firstbitlow_i16(i16 noundef %a) {
+entry:
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]]
+; CHECK: [[a32:%.+]] = OpUConvert [[u32_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindILsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i16(i16 %a)
+ ret i32 %elt.firstbitlow
+}
+
+; CHECK-LABEL: Begin function firstbitlow_v2i16
+define noundef <2 x i32> @firstbitlow_v2i16(<2 x i16> noundef %a) {
+entry:
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x2_t]]
+; CHECK: [[a32:%.+]] = OpUConvert [[u32x2_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i16(<2 x i16> %a)
+ ret <2 x i32> %elt.firstbitlow
+}
+
+; CHECK-LABEL: Begin function firstbitlow_i64
+define noundef i32 @firstbitlow_i64(i64 noundef %a) {
+entry:
+; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]]
+; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]]
+; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a32x2]]
+; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_zero]]
+; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_one]]
+; CHECK: [[should_use_high:%.+]] = OpIEqual [[bool_t]] [[low_bits]] [[const_neg_one]]
+; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[high_bits]] [[low_bits]]
+; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[const_thirty_two]] [[const_zero]]
+; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i64(i64 %a)
+ ret i32 %elt.firstbitlow
+}
+
+; CHECK-LABEL: Begin function firstbitlow_v2i64
+define noundef <2 x i32> @firstbitlow_v2i64(<2 x i64> noundef %a) {
+entry:
+; CHECK: [[a64x2:%.+]] = OpFunctionParameter [[u64x2_t]]
+; CHECK: [[a32x4:%.+]] = OpBitcast [[u32x4_t]] [[a64x2]]
+; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32x4]]
+; CHECK: [[high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 0 2
+; CHECK: [[low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 1 3
+; CHECK: [[should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[low_bits]] [[const_neg_onex2]]
+; CHECK: [[ans_bits:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[high_bits]] [[low_bits]]
+; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[const_thirty_twox2]] [[const_zerox2]]
+; CHECK: [[ret:%.+]] = OpIAdd [[u32x2_t]] [[ans_offset]] [[ans_bits]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i64(<2 x i64> %a)
+ ret <2 x i32> %elt.firstbitlow
+}
+
+;declare i16 @llvm.spv.firstbitlow.i16(i16)
+;declare i32 @llvm.spv.firstbitlow.i32(i32)
+;declare i64 @llvm.spv.firstbitlow.i64(i64)
+;declare i16 @llvm.spv.firstbitlow.v2i16(<2 x i16>)
+;declare i32 @llvm.spv.firstbitlow.v2i32(<2 x i32>)
+;declare i64 @llvm.spv.firstbitlow.v2i64(<2 x i64>)
>From 72f1999234cfa5de5bf3e46da46225a5b1e87924 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Tue, 19 Nov 2024 10:35:52 -0700
Subject: [PATCH 02/11] format
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 42 +++++++++++--------
1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index fe8879a6991047..dd00947f985496 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3314,8 +3314,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow16(Register ResVReg,
// to an unsigned i32. As this leaves all the least significant bits unchanged
// the first set bit from the LSB side doesn't change.
Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- bool Result = selectNAryOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()},
- SPIRV::OpUConvert);
+ bool Result = selectNAryOpWithSrcs(
+ ExtReg, ResType, I, {I.getOperand(2).getReg()}, SPIRV::OpUConvert);
return Result && selectFirstBitLow32(ResVReg, ResType, I, ExtReg);
}
@@ -3343,7 +3343,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
MachineIRBuilder MIRBuilder(I);
SPIRVType *PostCastType =
GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder);
- Register BitcastReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
+ Register BitcastReg =
+ MRI->createVirtualRegister(GR.getRegClass(PostCastType));
bool Result =
selectUnOpWithSrc(BitcastReg, PostCastType, I, OpReg, SPIRV::OpBitcast);
@@ -3359,14 +3360,18 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
if (IsScalarRes) {
// if scalar do a vector extract
- Result = Result && selectNAryOpWithSrcs(
- HighReg, ResType, I,
- {FBLReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)},
- SPIRV::OpVectorExtractDynamic);
- Result = Result && selectNAryOpWithSrcs(
- LowReg, ResType, I,
- {FBLReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)},
- SPIRV::OpVectorExtractDynamic);
+ Result =
+ Result &&
+ selectNAryOpWithSrcs(
+ HighReg, ResType, I,
+ {FBLReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)},
+ SPIRV::OpVectorExtractDynamic);
+ Result =
+ Result &&
+ selectNAryOpWithSrcs(
+ LowReg, ResType, I,
+ {FBLReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)},
+ SPIRV::OpVectorExtractDynamic);
} else {
// if vector do a shufflevector
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
@@ -3414,7 +3419,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
SelectOp = SPIRV::OpSelectSISCond;
AddOp = SPIRV::OpIAddS;
} else {
- BoolType = GR.getOrCreateSPIRVVectorType(BoolType, ComponentCount, MIRBuilder);
+ BoolType =
+ GR.getOrCreateSPIRVVectorType(BoolType, ComponentCount, MIRBuilder);
NegOneReg =
GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull);
Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull);
@@ -3425,18 +3431,18 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
// Check if the low bits are == -1; true if -1
Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType));
- Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I, {LowReg, NegOneReg},
- SPIRV::OpIEqual);
+ Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I,
+ {LowReg, NegOneReg}, SPIRV::OpIEqual);
// Select high bits if true in BReg, otherwise low bits
Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I, {BReg, HighReg, LowReg},
- SelectOp);
+ Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I,
+ {BReg, HighReg, LowReg}, SelectOp);
// Add 32 for high bits, 0 for low bits
Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Result = Result &&
- selectNAryOpWithSrcs(ValReg, ResType, I, {BReg, Reg32, Reg0}, SelectOp);
+ Result = Result && selectNAryOpWithSrcs(ValReg, ResType, I,
+ {BReg, Reg32, Reg0}, SelectOp);
return Result &&
selectNAryOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp);
>From 8434e6ad8590baa3848192728433a0ad9fe02f4b Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Tue, 19 Nov 2024 11:56:07 -0700
Subject: [PATCH 03/11] cleanup
---
llvm/lib/Target/DirectX/DXIL.td | 1 -
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 11 ++++----
.../SPIRV/hlsl-intrinsics/firstbitlow.ll | 26 +++++++++----------
3 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index a208ba7663a3b4..d6d78581bafbf7 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -627,7 +627,6 @@ def FirstbitLo : DXILOp<32, unaryBits> {
let overloads =
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
- // TODO: check these
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index dd00947f985496..e1c58f85785542 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2908,7 +2908,6 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb
return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true);
case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb
- // (true?)
return selectFirstBitLow(ResVReg, ResType, I);
case Intrinsic::spv_group_memory_barrier_with_group_sync: {
bool Result = true;
@@ -3382,7 +3381,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
// Per the spec, repeat the vector if only one vec is needed
.addUse(FBLReg);
- // high bits are store in even indexes. Extract them from FBLReg
+ // high bits are stored in even indexes. Extract them from FBLReg
for (unsigned j = 0; j < ComponentCount * 2; j += 2) {
MIB.addImm(j);
}
@@ -3396,14 +3395,14 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
// Per the spec, repeat the vector if only one vec is needed
.addUse(FBLReg);
- // low bits are store in odd indexes. Extract them from FBLReg
+ // low bits are stored in odd indexes. Extract them from FBLReg
for (unsigned j = 1; j < ComponentCount * 2; j += 2) {
MIB.addImm(j);
}
Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
}
- // 4. Check if result of each bottom 32 bits is == -1
+ // 4. Check the result. When low bits == -1 use high, otherwise use low
SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII);
Register NegOneReg;
Register Reg0;
@@ -3429,7 +3428,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
AddOp = SPIRV::OpIAddV;
}
- // Check if the low bits are == -1; true if -1
+ // Check if the low bits are == -1
Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType));
Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I,
{LowReg, NegOneReg}, SPIRV::OpIEqual);
@@ -3439,7 +3438,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I,
{BReg, HighReg, LowReg}, SelectOp);
- // Add 32 for high bits, 0 for low bits
+ // 5. Add 32 when high bits are used, otherwise 0 for low bits
Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
Result = Result && selectNAryOpWithSrcs(ValReg, ResType, I,
{BReg, Reg32, Reg0}, SelectOp);
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
index 9ebd8cc511eb6c..05488479e5bd0f 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
@@ -6,13 +6,13 @@
; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0
; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2
; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4
-; CHECK-DAG: [[const_zero:%.*]] = OpConstant [[u32_t]] 0
-; CHECK-DAG: [[const_zerox2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_zero]] [[const_zero]]
-; CHECK-DAG: [[const_one:%.*]] = OpConstant [[u32_t]] 1
-; CHECK-DAG: [[const_thirty_two:%.*]] = OpConstant [[u32_t]] 32
-; CHECK-DAG: [[const_thirty_twox2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_thirty_two]] [[const_thirty_two]]
-; CHECK-DAG: [[const_neg_one:%.*]] = OpConstant [[u32_t]] 4294967295
-; CHECK-DAG: [[const_neg_onex2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg_one]] [[const_neg_one]]
+; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0
+; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]]
+; CHECK-DAG: [[const_1:%.*]] = OpConstant [[u32_t]] 1
+; CHECK-DAG: [[const_32:%.*]] = OpConstant [[u32_t]] 32
+; CHECK-DAG: [[const_32x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_32]] [[const_32]]
+; CHECK-DAG: [[const_neg1:%.*]] = OpConstant [[u32_t]] 4294967295
+; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]]
; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0
; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2
; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0
@@ -68,11 +68,11 @@ entry:
; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]]
; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]]
; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a32x2]]
-; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_zero]]
-; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_one]]
-; CHECK: [[should_use_high:%.+]] = OpIEqual [[bool_t]] [[low_bits]] [[const_neg_one]]
+; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]]
+; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]]
+; CHECK: [[should_use_high:%.+]] = OpIEqual [[bool_t]] [[low_bits]] [[const_neg1]]
; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[high_bits]] [[low_bits]]
-; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[const_thirty_two]] [[const_zero]]
+; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[const_32]] [[const_0]]
; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]]
; CHECK: OpReturnValue [[ret]]
%elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i64(i64 %a)
@@ -87,9 +87,9 @@ entry:
; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32x4]]
; CHECK: [[high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 0 2
; CHECK: [[low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 1 3
-; CHECK: [[should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[low_bits]] [[const_neg_onex2]]
+; CHECK: [[should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[low_bits]] [[const_neg1x2]]
; CHECK: [[ans_bits:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[high_bits]] [[low_bits]]
-; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[const_thirty_twox2]] [[const_zerox2]]
+; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[const_32x2]] [[const_0x2]]
; CHECK: [[ret:%.+]] = OpIAdd [[u32x2_t]] [[ans_offset]] [[ans_bits]]
; CHECK: OpReturnValue [[ret]]
%elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i64(<2 x i64> %a)
>From b6bdc0dffb05163dedb5e5e82bf8b9f079298225 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Fri, 22 Nov 2024 13:19:43 -0700
Subject: [PATCH 04/11] Address comments
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 327 ++++++------------
1 file changed, 108 insertions(+), 219 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index e1c58f85785542..bca67585d2858d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -96,27 +96,20 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectFirstBitHigh(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I, bool IsSigned) const;
- bool selectFirstBitHigh16(Register ResVReg, const SPIRVType *ResType,
- MachineInstr &I, bool IsSigned) const;
-
- bool selectFirstBitHigh32(Register ResVReg, const SPIRVType *ResType,
- MachineInstr &I, Register SrcReg,
- bool IsSigned) const;
-
- bool selectFirstBitHigh64(Register ResVReg, const SPIRVType *ResType,
- MachineInstr &I, bool IsSigned) const;
-
bool selectFirstBitLow(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
- bool selectFirstBitLow16(Register ResVReg, const SPIRVType *ResType,
- MachineInstr &I) const;
+ bool selectFirstBitSet16(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, unsigned ExtendOpcode,
+ unsigned BitSetOpcode) const;
- bool selectFirstBitLow32(Register ResVReg, const SPIRVType *ResType,
- MachineInstr &I, Register SrcReg) const;
+ bool selectFirstBitSet32(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, Register SrcReg,
+ unsigned Opcode) const;
- bool selectFirstBitLow64(Register ResVReg, const SPIRVType *ResType,
- MachineInstr &I) const;
+ bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, unsigned ExtendOpcode,
+ unsigned BitSetOpcode, bool SwapPrimarySide) const;
bool selectGlobalValue(Register ResVReg, MachineInstr &I,
const MachineInstr *Init = nullptr) const;
@@ -3153,187 +3146,34 @@ Register SPIRVInstructionSelector::buildPointerToResource(
return AcReg;
}
-bool SPIRVInstructionSelector::selectFirstBitHigh16(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- bool IsSigned) const {
- unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert;
- // zero or sign extend
+bool SPIRVInstructionSelector::selectFirstBitSet16(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ unsigned ExtendOpcode, unsigned BitSetOpcode) const {
Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- bool Result =
- selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, Opcode);
- return Result && selectFirstBitHigh32(ResVReg, ResType, I, ExtReg, IsSigned);
-}
-
-bool SPIRVInstructionSelector::selectFirstBitHigh32(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- Register SrcReg,
- bool IsSigned) const {
- unsigned Opcode = IsSigned ? GL::FindSMsb : GL::FindUMsb;
- return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
- .addImm(Opcode)
- .addUse(SrcReg)
- .constrainAllUses(TII, TRI, RBI);
-}
-
-bool SPIRVInstructionSelector::selectFirstBitHigh64(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- bool IsSigned) const {
- Register OpReg = I.getOperand(2).getReg();
- // 1. split our int64 into 2 pieces using a bitcast
- unsigned count = GR.getScalarOrVectorComponentCount(ResType);
- SPIRVType *baseType = GR.retrieveScalarOrVectorIntType(ResType);
- MachineIRBuilder MIRBuilder(I);
- SPIRVType *postCastT =
- GR.getOrCreateSPIRVVectorType(baseType, 2 * count, MIRBuilder);
- Register bitcastReg = MRI->createVirtualRegister(GR.getRegClass(postCastT));
- bool Result =
- selectOpWithSrcs(bitcastReg, postCastT, I, {OpReg}, SPIRV::OpBitcast);
-
- // 2. call firstbithigh
- Register FBHReg = MRI->createVirtualRegister(GR.getRegClass(postCastT));
- Result &= selectFirstBitHigh32(FBHReg, postCastT, I, bitcastReg, IsSigned);
-
- // 3. split result vector into high bits and low bits
- Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
-
- bool ZeroAsNull = STI.isOpenCLEnv();
- bool isScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
- if (isScalarRes) {
- // if scalar do a vector extract
- Result &= selectOpWithSrcs(
- HighReg, ResType, I,
- {FBHReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)},
- SPIRV::OpVectorExtractDynamic);
- Result &= selectOpWithSrcs(
- LowReg, ResType, I,
- {FBHReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)},
- SPIRV::OpVectorExtractDynamic);
- } else { // vector case do a shufflevector
- auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(SPIRV::OpVectorShuffle))
- .addDef(HighReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(FBHReg)
- .addUse(FBHReg);
- // ^^ this vector will not be selected from; could be empty
- unsigned j;
- for (j = 0; j < count * 2; j += 2) {
- MIB.addImm(j);
- }
- Result &= MIB.constrainAllUses(TII, TRI, RBI);
-
- // get low bits
- MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(SPIRV::OpVectorShuffle))
- .addDef(LowReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(FBHReg)
- .addUse(FBHReg);
- // ^^ this vector will not be selected from; could be empty
- for (j = 1; j < count * 2; j += 2) {
- MIB.addImm(j);
- }
- Result &= MIB.constrainAllUses(TII, TRI, RBI);
- }
-
- // 4. check if result of each top 32 bits is == -1
- SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII);
- Register NegOneReg;
- Register Reg0;
- Register Reg32;
- unsigned selectOp;
- unsigned addOp;
- if (isScalarRes) {
- NegOneReg =
- GR.getOrCreateConstInt((unsigned)-1, I, ResType, TII, ZeroAsNull);
- Reg0 = GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull);
- Reg32 = GR.getOrCreateConstInt(32, I, ResType, TII, ZeroAsNull);
- selectOp = SPIRV::OpSelectSISCond;
- addOp = SPIRV::OpIAddS;
- } else {
- BoolType = GR.getOrCreateSPIRVVectorType(BoolType, count, MIRBuilder);
- NegOneReg =
- GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull);
- Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull);
- Reg32 = GR.getOrCreateConstVector(32, I, ResType, TII, ZeroAsNull);
- selectOp = SPIRV::OpSelectVIVCond;
- addOp = SPIRV::OpIAddV;
- }
-
- // check if the high bits are == -1; true if -1
- Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType));
- Result &= selectOpWithSrcs(BReg, BoolType, I, {HighReg, NegOneReg},
- SPIRV::OpIEqual);
-
- // Select low bits if true in BReg, otherwise high bits
- Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Result &=
- selectOpWithSrcs(TmpReg, ResType, I, {BReg, LowReg, HighReg}, selectOp);
-
- // Add 32 for high bits, 0 for low bits
- Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Result &= selectOpWithSrcs(ValReg, ResType, I, {BReg, Reg0, Reg32}, selectOp);
+ bool Result = selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()},
+ ExtendOpcode);
return Result &&
- selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, addOp);
-}
-
-bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- bool IsSigned) const {
- // FindUMsb and FindSMsb intrinsics only support 32 bit integers
- Register OpReg = I.getOperand(2).getReg();
- SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg);
-
- switch (GR.getScalarOrVectorBitWidth(OpType)) {
- case 16:
- return selectFirstBitHigh16(ResVReg, ResType, I, IsSigned);
- case 32:
- return selectFirstBitHigh32(ResVReg, ResType, I, OpReg, IsSigned);
- case 64:
- return selectFirstBitHigh64(ResVReg, ResType, I, IsSigned);
- default:
- report_fatal_error(
- "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits.");
- }
+ selectFirstBitSet32(ResVReg, ResType, I, ExtReg, BitSetOpcode);
}
-bool SPIRVInstructionSelector::selectFirstBitLow16(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I) const {
- // OpUConvert treats the operand bits as an unsigned i16 and zero extends it
- // to an unsigned i32. As this leaves all the least significant bits unchanged
- // the first set bit from the LSB side doesn't change.
- Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- bool Result = selectNAryOpWithSrcs(
- ExtReg, ResType, I, {I.getOperand(2).getReg()}, SPIRV::OpUConvert);
- return Result && selectFirstBitLow32(ResVReg, ResType, I, ExtReg);
-}
-
-bool SPIRVInstructionSelector::selectFirstBitLow32(Register ResVReg,
+bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I,
- Register SrcReg) const {
+ Register SrcReg,
+ unsigned Opcode) const {
return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
- .addImm(GL::FindILsb)
+ .addImm(Opcode)
.addUse(SrcReg)
.constrainAllUses(TII, TRI, RBI);
}
-bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I) const {
+bool SPIRVInstructionSelector::selectFirstBitSet64(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ unsigned ExtendOpcode, unsigned BitSetOpcode, bool SwapPrimarySide) const {
Register OpReg = I.getOperand(2).getReg();
// 1. Split int64 into 2 pieces using a bitcast
@@ -3345,11 +3185,12 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
Register BitcastReg =
MRI->createVirtualRegister(GR.getRegClass(PostCastType));
bool Result =
- selectUnOpWithSrc(BitcastReg, PostCastType, I, OpReg, SPIRV::OpBitcast);
+ selectOpWithSrcs(BitcastReg, PostCastType, I, {OpReg}, SPIRV::OpBitcast);
- // 2. Find the first set bit from the LSB side for all the pieces in #1
- Register FBLReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
- Result = Result && selectFirstBitLow32(FBLReg, PostCastType, I, BitcastReg);
+ // 2. Find the first set bit from the primary side for all the pieces in #1
+ Register FBPReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
+ Result = Result && selectFirstBitSet32(FBPReg, PostCastType, I, BitcastReg,
+ BitSetOpcode);
// 3. Split result vector into high bits and low bits
Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
@@ -3359,31 +3200,29 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
if (IsScalarRes) {
// if scalar do a vector extract
- Result =
- Result &&
- selectNAryOpWithSrcs(
- HighReg, ResType, I,
- {FBLReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)},
- SPIRV::OpVectorExtractDynamic);
- Result =
- Result &&
- selectNAryOpWithSrcs(
- LowReg, ResType, I,
- {FBLReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)},
- SPIRV::OpVectorExtractDynamic);
+ Result = Result &&
+ selectOpWithSrcs(HighReg, ResType, I,
+ {FBPReg, GR.getOrCreateConstInt(0, I, ResType,
+ TII, ZeroAsNull)},
+ SPIRV::OpVectorExtractDynamic);
+ Result = Result &&
+ selectOpWithSrcs(LowReg, ResType, I,
+ {FBPReg, GR.getOrCreateConstInt(1, I, ResType,
+ TII, ZeroAsNull)},
+ SPIRV::OpVectorExtractDynamic);
} else {
// if vector do a shufflevector
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(SPIRV::OpVectorShuffle))
.addDef(HighReg)
.addUse(GR.getSPIRVTypeID(ResType))
- .addUse(FBLReg)
+ .addUse(FBPReg)
// Per the spec, repeat the vector if only one vec is needed
- .addUse(FBLReg);
+ .addUse(FBPReg);
// high bits are stored in even indexes. Extract them from FBLReg
- for (unsigned j = 0; j < ComponentCount * 2; j += 2) {
- MIB.addImm(j);
+ for (unsigned J = 0; J < ComponentCount * 2; J += 2) {
+ MIB.addImm(J);
}
Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
@@ -3391,18 +3230,19 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
TII.get(SPIRV::OpVectorShuffle))
.addDef(LowReg)
.addUse(GR.getSPIRVTypeID(ResType))
- .addUse(FBLReg)
+ .addUse(FBPReg)
// Per the spec, repeat the vector if only one vec is needed
- .addUse(FBLReg);
+ .addUse(FBPReg);
// low bits are stored in odd indexes. Extract them from FBLReg
- for (unsigned j = 1; j < ComponentCount * 2; j += 2) {
- MIB.addImm(j);
+ for (unsigned J = 1; J < ComponentCount * 2; J += 2) {
+ MIB.addImm(J);
}
Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
}
- // 4. Check the result. When low bits == -1 use high, otherwise use low
+ // 4. Check the result. When primary bits == -1 use secondary, otherwise use
+ // primary
SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII);
Register NegOneReg;
Register Reg0;
@@ -3428,23 +3268,66 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg,
AddOp = SPIRV::OpIAddV;
}
- // Check if the low bits are == -1
+ Register PrimaryReg;
+ Register SecondaryReg;
+ Register PrimaryShiftReg;
+ Register SecondaryShiftReg;
+ if (SwapPrimarySide) {
+ PrimaryReg = LowReg;
+ SecondaryReg = HighReg;
+ PrimaryShiftReg = Reg0;
+ SecondaryShiftReg = Reg32;
+ } else {
+ PrimaryReg = HighReg;
+ SecondaryReg = LowReg;
+ PrimaryShiftReg = Reg32;
+ SecondaryShiftReg = Reg0;
+ }
+
+ // Check if the primary bits are == -1
Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType));
- Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I,
- {LowReg, NegOneReg}, SPIRV::OpIEqual);
+ Result = Result && selectOpWithSrcs(BReg, BoolType, I,
+ {PrimaryReg, NegOneReg}, SPIRV::OpIEqual);
- // Select high bits if true in BReg, otherwise low bits
+ // Select secondary bits if true in BReg, otherwise primary bits
Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I,
- {BReg, HighReg, LowReg}, SelectOp);
+ Result =
+ Result && selectOpWithSrcs(TmpReg, ResType, I,
+ {BReg, SecondaryReg, PrimaryReg}, SelectOp);
// 5. Add 32 when high bits are used, otherwise 0 for low bits
Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Result = Result && selectNAryOpWithSrcs(ValReg, ResType, I,
- {BReg, Reg32, Reg0}, SelectOp);
+ Result = Result && selectOpWithSrcs(
+ ValReg, ResType, I,
+ {BReg, SecondaryShiftReg, PrimaryShiftReg}, SelectOp);
return Result &&
- selectNAryOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp);
+ selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp);
+}
+
+bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ bool IsSigned) const {
+ // FindUMsb and FindSMsb intrinsics only support 32 bit integers
+ Register OpReg = I.getOperand(2).getReg();
+ SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg);
+ // zero or sign extend
+ unsigned ExtendOpcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert;
+ unsigned BitSetOpcode = IsSigned ? GL::FindSMsb : GL::FindUMsb;
+
+ switch (GR.getScalarOrVectorBitWidth(OpType)) {
+ case 16:
+ return selectFirstBitSet16(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode);
+ case 32:
+ return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
+ case 64:
+ return selectFirstBitSet64(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode,
+ /*SwapPrimarySide=*/false);
+ default:
+ report_fatal_error(
+ "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits.");
+ }
}
bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
@@ -3453,14 +3336,20 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
// FindILsb intrinsic only supports 32 bit integers
Register OpReg = I.getOperand(2).getReg();
SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg);
+ // OpUConvert treats the operand bits as an unsigned i16 and zero extends it
+ // to an unsigned i32. As this leaves all the least significant bits unchanged
+ // so the first set bit from the LSB side doesn't change.
+ unsigned ExtendOpcode = SPIRV::OpUConvert;
+ unsigned BitSetOpcode = GL::FindILsb;
switch (GR.getScalarOrVectorBitWidth(OpType)) {
case 16:
- return selectFirstBitLow16(ResVReg, ResType, I);
+ return selectFirstBitSet16(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode);
case 32:
- return selectFirstBitLow32(ResVReg, ResType, I, OpReg);
+ return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
case 64:
- return selectFirstBitLow64(ResVReg, ResType, I);
+ return selectFirstBitSet64(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode,
+ /*SwapPrimarySide=*/true);
default:
report_fatal_error("spv_firstbitlow only supports 16,32,64 bits.");
}
>From 3c74bfe7d2835aded89dabbd2cc07cda9a987a7a Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Fri, 22 Nov 2024 13:40:45 -0700
Subject: [PATCH 05/11] cleanup
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 36 ++++++++++---------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index bca67585d2858d..cb5e7c6be3573e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -108,8 +108,8 @@ class SPIRVInstructionSelector : public InstructionSelector {
unsigned Opcode) const;
bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType,
- MachineInstr &I, unsigned ExtendOpcode,
- unsigned BitSetOpcode, bool SwapPrimarySide) const;
+ MachineInstr &I, unsigned BitSetOpcode,
+ bool SwapPrimarySide) const;
bool selectGlobalValue(Register ResVReg, MachineInstr &I,
const MachineInstr *Init = nullptr) const;
@@ -3171,9 +3171,11 @@ bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg,
.constrainAllUses(TII, TRI, RBI);
}
-bool SPIRVInstructionSelector::selectFirstBitSet64(
- Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
- unsigned ExtendOpcode, unsigned BitSetOpcode, bool SwapPrimarySide) const {
+bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ unsigned BitSetOpcode,
+ bool SwapPrimarySide) const {
Register OpReg = I.getOperand(2).getReg();
// 1. Split int64 into 2 pieces using a bitcast
@@ -3188,8 +3190,8 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
selectOpWithSrcs(BitcastReg, PostCastType, I, {OpReg}, SPIRV::OpBitcast);
// 2. Find the first set bit from the primary side for all the pieces in #1
- Register FBPReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
- Result = Result && selectFirstBitSet32(FBPReg, PostCastType, I, BitcastReg,
+ Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
+ Result = Result && selectFirstBitSet32(FBSReg, PostCastType, I, BitcastReg,
BitSetOpcode);
// 3. Split result vector into high bits and low bits
@@ -3202,12 +3204,12 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
// if scalar do a vector extract
Result = Result &&
selectOpWithSrcs(HighReg, ResType, I,
- {FBPReg, GR.getOrCreateConstInt(0, I, ResType,
+ {FBSReg, GR.getOrCreateConstInt(0, I, ResType,
TII, ZeroAsNull)},
SPIRV::OpVectorExtractDynamic);
Result = Result &&
selectOpWithSrcs(LowReg, ResType, I,
- {FBPReg, GR.getOrCreateConstInt(1, I, ResType,
+ {FBSReg, GR.getOrCreateConstInt(1, I, ResType,
TII, ZeroAsNull)},
SPIRV::OpVectorExtractDynamic);
} else {
@@ -3216,11 +3218,11 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
TII.get(SPIRV::OpVectorShuffle))
.addDef(HighReg)
.addUse(GR.getSPIRVTypeID(ResType))
- .addUse(FBPReg)
+ .addUse(FBSReg)
// Per the spec, repeat the vector if only one vec is needed
- .addUse(FBPReg);
+ .addUse(FBSReg);
- // high bits are stored in even indexes. Extract them from FBLReg
+ // high bits are stored in even indexes. Extract them from FBSReg
for (unsigned J = 0; J < ComponentCount * 2; J += 2) {
MIB.addImm(J);
}
@@ -3230,11 +3232,11 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
TII.get(SPIRV::OpVectorShuffle))
.addDef(LowReg)
.addUse(GR.getSPIRVTypeID(ResType))
- .addUse(FBPReg)
+ .addUse(FBSReg)
// Per the spec, repeat the vector if only one vec is needed
- .addUse(FBPReg);
+ .addUse(FBSReg);
- // low bits are stored in odd indexes. Extract them from FBLReg
+ // low bits are stored in odd indexes. Extract them from FBSReg
for (unsigned J = 1; J < ComponentCount * 2; J += 2) {
MIB.addImm(J);
}
@@ -3322,7 +3324,7 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
case 32:
return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
case 64:
- return selectFirstBitSet64(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode,
+ return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode,
/*SwapPrimarySide=*/false);
default:
report_fatal_error(
@@ -3348,7 +3350,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
case 32:
return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
case 64:
- return selectFirstBitSet64(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode,
+ return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode,
/*SwapPrimarySide=*/true);
default:
report_fatal_error("spv_firstbitlow only supports 16,32,64 bits.");
>From a90026c858f8db3b9f1bcb2b45d764d255672c99 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Fri, 22 Nov 2024 16:58:29 -0700
Subject: [PATCH 06/11] Divide vectors that surpass 4 element limit
---
llvm/lib/Target/DirectX/DXIL.td | 2 +-
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 136 ++++++++++++++----
.../SPIRV/hlsl-intrinsics/firstbitlow.ll | 119 ++++++++++++++-
3 files changed, 230 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index d6d78581bafbf7..367009d7f92e67 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -621,7 +621,7 @@ def CountBits : DXILOp<31, unaryBits> {
def FirstbitLo : DXILOp<32, unaryBits> {
let Doc = "Returns the location of the first set bit starting from "
"the lowest order bit and working upward.";
- let LLVMIntrinsic = int_dx_firstbitlow;
+ let intrinsics = [ IntrinSelect<int_dx_firstbitlow> ];
let arguments = [OverloadTy];
let result = Int32Ty;
let overloads =
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index cb5e7c6be3573e..b2115528b8dcb4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -108,8 +108,8 @@ class SPIRVInstructionSelector : public InstructionSelector {
unsigned Opcode) const;
bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType,
- MachineInstr &I, unsigned BitSetOpcode,
- bool SwapPrimarySide) const;
+ MachineInstr &I, Register SrcReg,
+ unsigned BitSetOpcode, bool SwapPrimarySide) const;
bool selectGlobalValue(Register ResVReg, MachineInstr &I,
const MachineInstr *Init = nullptr) const;
@@ -3171,23 +3171,116 @@ bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg,
.constrainAllUses(TII, TRI, RBI);
}
-bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- unsigned BitSetOpcode,
- bool SwapPrimarySide) const {
- Register OpReg = I.getOperand(2).getReg();
-
- // 1. Split int64 into 2 pieces using a bitcast
+bool SPIRVInstructionSelector::selectFirstBitSet64(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
+ bool ZeroAsNull = STI.isOpenCLEnv();
+ Register ConstIntZero =
+ GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull);
+ Register ConstIntOne =
+ GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull);
+
+ // SPIRV doesn't support vectors with more than 4 components. Since the
+ // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
+ // operate on vectors with 2 or less components. When largers vectors are
+ // seen. Split them, recurse, then recombine them.
+ if (ComponentCount > 2) {
+ unsigned LeftComponentCount = ComponentCount / 2;
+ unsigned RightComponentCount = ComponentCount - LeftComponentCount;
+ bool LeftIsVector = LeftComponentCount > 1;
+
+ // Split the SrcReg in half into 2 smaller vec registers
+ // (ie i64x4 -> i64x2, i64x2)
+ MachineIRBuilder MIRBuilder(I);
+ SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
+ SPIRVType *LeftVecOpType;
+ SPIRVType *LeftVecResType;
+ if (LeftIsVector) {
+ LeftVecOpType =
+ GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
+ LeftVecResType = GR.getOrCreateSPIRVVectorType(
+ BaseType, LeftComponentCount, MIRBuilder);
+ } else {
+ LeftVecOpType = OpType;
+ LeftVecResType = BaseType;
+ }
+
+ SPIRVType *RightVecOpType =
+ GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder);
+ SPIRVType *RightVecResType = GR.getOrCreateSPIRVVectorType(
+ BaseType, RightComponentCount, MIRBuilder);
+
+ Register LeftSideIn =
+ MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType));
+ Register RightSideIn =
+ MRI->createVirtualRegister(GR.getRegClass(RightVecOpType));
+
+ bool Result;
+
+ if (LeftIsVector) {
+ auto MIB =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SPIRV::OpVectorShuffle))
+ .addDef(LeftSideIn)
+ .addUse(GR.getSPIRVTypeID(LeftVecOpType))
+ .addUse(SrcReg)
+ // Per the spec, repeat the vector if only one vec is needed
+ .addUse(SrcReg);
+
+ for (unsigned J = 0; J < LeftComponentCount; J++) {
+ MIB.addImm(J);
+ }
+
+ Result = MIB.constrainAllUses(TII, TRI, RBI);
+ } else {
+ Result =
+ selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero},
+ SPIRV::OpVectorExtractDynamic);
+ }
+
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SPIRV::OpVectorShuffle))
+ .addDef(RightSideIn)
+ .addUse(GR.getSPIRVTypeID(RightVecOpType))
+ .addUse(SrcReg)
+ // Per the spec, repeat the vector if only one vec is needed
+ .addUse(SrcReg);
+
+ for (unsigned J = LeftComponentCount; J < ComponentCount; J++) {
+ MIB.addImm(J);
+ }
+
+ Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+
+ // Recursively call selectFirstBitSet64 on the 2 registers
+ Register LeftSideOut =
+ MRI->createVirtualRegister(GR.getRegClass(LeftVecResType));
+ Register RightSideOut =
+ MRI->createVirtualRegister(GR.getRegClass(RightVecResType));
+ Result = Result &&
+ selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn,
+ BitSetOpcode, SwapPrimarySide);
+ Result = Result &&
+ selectFirstBitSet64(RightSideOut, RightVecResType, I, RightSideIn,
+ BitSetOpcode, SwapPrimarySide);
+
+ // Join the two resulting registers back into the return type
+ // (ie i32x2, i32x2 -> i32x4)
+ return Result &&
+ selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
+ SPIRV::OpCompositeConstruct);
+ }
+
+ // 1. Split int64 into 2 pieces using a bitcast
MachineIRBuilder MIRBuilder(I);
SPIRVType *PostCastType =
GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder);
Register BitcastReg =
MRI->createVirtualRegister(GR.getRegClass(PostCastType));
bool Result =
- selectOpWithSrcs(BitcastReg, PostCastType, I, {OpReg}, SPIRV::OpBitcast);
+ selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg}, SPIRV::OpBitcast);
// 2. Find the first set bit from the primary side for all the pieces in #1
Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
@@ -3198,20 +3291,15 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg,
Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- bool ZeroAsNull = STI.isOpenCLEnv();
bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
if (IsScalarRes) {
// if scalar do a vector extract
- Result = Result &&
- selectOpWithSrcs(HighReg, ResType, I,
- {FBSReg, GR.getOrCreateConstInt(0, I, ResType,
- TII, ZeroAsNull)},
- SPIRV::OpVectorExtractDynamic);
- Result = Result &&
- selectOpWithSrcs(LowReg, ResType, I,
- {FBSReg, GR.getOrCreateConstInt(1, I, ResType,
- TII, ZeroAsNull)},
- SPIRV::OpVectorExtractDynamic);
+ Result =
+ Result && selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero},
+ SPIRV::OpVectorExtractDynamic);
+ Result =
+ Result && selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne},
+ SPIRV::OpVectorExtractDynamic);
} else {
// if vector do a shufflevector
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
@@ -3324,7 +3412,7 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
case 32:
return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
case 64:
- return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode,
+ return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
/*SwapPrimarySide=*/false);
default:
report_fatal_error(
@@ -3350,7 +3438,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
case 32:
return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
case 64:
- return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode,
+ return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
/*SwapPrimarySide=*/true);
default:
report_fatal_error("spv_firstbitlow only supports 16,32,64 bits.");
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
index 05488479e5bd0f..f3cc73637b1360 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
@@ -5,6 +5,7 @@
; CHECK-DAG: OpMemoryModel Logical GLSL450
; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0
; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2
+; CHECK-DAG: [[u32x3_t:%.+]] = OpTypeVector [[u32_t]] 3
; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4
; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0
; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]]
@@ -15,8 +16,12 @@
; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]]
; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0
; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2
+; CHECK-DAG: [[u16x3_t:%.+]] = OpTypeVector [[u16_t]] 3
+; CHECK-DAG: [[u16x4_t:%.+]] = OpTypeVector [[u16_t]] 4
; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0
; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2
+; CHECK-DAG: [[u64x3_t:%.+]] = OpTypeVector [[u64_t]] 3
+; CHECK-DAG: [[u64x4_t:%.+]] = OpTypeVector [[u64_t]] 4
; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool
; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2
@@ -30,8 +35,8 @@ entry:
ret i32 %elt.firstbitlow
}
-; CHECK-LABEL: Begin function firstbitlow_2xi32
-define noundef <2 x i32> @firstbitlow_2xi32(<2 x i32> noundef %a) {
+; CHECK-LABEL: Begin function firstbitlow_v2xi32
+define noundef <2 x i32> @firstbitlow_v2xi32(<2 x i32> noundef %a) {
entry:
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]]
; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a]]
@@ -40,6 +45,26 @@ entry:
ret <2 x i32> %elt.firstbitlow
}
+; CHECK-LABEL: Begin function firstbitlow_v3xi32
+define noundef <3 x i32> @firstbitlow_v3xi32(<3 x i32> noundef %a) {
+entry:
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x3_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindILsb [[a]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i32(<3 x i32> %a)
+ ret <3 x i32> %elt.firstbitlow
+}
+
+; CHECK-LABEL: Begin function firstbitlow_v4xi32
+define noundef <4 x i32> @firstbitlow_v4xi32(<4 x i32> noundef %a) {
+entry:
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x4_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i32(<4 x i32> %a)
+ ret <4 x i32> %elt.firstbitlow
+}
+
; CHECK-LABEL: Begin function firstbitlow_i16
define noundef i32 @firstbitlow_i16(i16 noundef %a) {
entry:
@@ -62,6 +87,28 @@ entry:
ret <2 x i32> %elt.firstbitlow
}
+; CHECK-LABEL: Begin function firstbitlow_v3xi16
+define noundef <3 x i32> @firstbitlow_v3xi16(<3 x i16> noundef %a) {
+entry:
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x3_t]]
+; CHECK: [[a32:%.+]] = OpUConvert [[u32x3_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindILsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i16(<3 x i16> %a)
+ ret <3 x i32> %elt.firstbitlow
+}
+
+; CHECK-LABEL: Begin function firstbitlow_v4xi16
+define noundef <4 x i32> @firstbitlow_v4xi16(<4 x i16> noundef %a) {
+entry:
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x4_t]]
+; CHECK: [[a32:%.+]] = OpUConvert [[u32x4_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i16(<4 x i16> %a)
+ ret <4 x i32> %elt.firstbitlow
+}
+
; CHECK-LABEL: Begin function firstbitlow_i64
define noundef i32 @firstbitlow_i64(i64 noundef %a) {
entry:
@@ -96,6 +143,74 @@ entry:
ret <2 x i32> %elt.firstbitlow
}
+; CHECK-LABEL: Begin function firstbitlow_v3i64
+define noundef <3 x i32> @firstbitlow_v3i64(<3 x i64> noundef %a) {
+entry:
+; Split the i64x3 into i64, i64x2
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]]
+; CHECK: [[left:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_0]]
+; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 1 2
+
+; Do firstbitlow on i64, i64x2
+; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x2_t]] [[left]]
+; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[left_cast]]
+; CHECK: [[left_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_0]]
+; CHECK: [[left_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_1]]
+; CHECK: [[left_should_use_high:%.+]] = OpIEqual [[bool_t]] [[left_low_bits]] [[const_neg1]]
+; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32_t]] [[left_should_use_high]] [[left_high_bits]] [[left_low_bits]]
+; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32_t]] [[left_should_use_high]] [[const_32]] [[const_0]]
+; CHECK: [[left_res:%.+]] = OpIAdd [[u32_t]] [[left_ans_offset]] [[left_ans_bits]]
+
+; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]]
+; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[right_cast]]
+; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2
+; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3
+; CHECK: [[right_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[right_low_bits]] [[const_neg1x2]]
+; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[right_high_bits]] [[right_low_bits]]
+; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[const_32x2]] [[const_0x2]]
+; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]]
+
+; Merge the resulting i32, i32x2 into the final i32x3 and return it
+; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[left_res]] [[right_res]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i64(<3 x i64> %a)
+ ret <3 x i32> %elt.firstbitlow
+}
+
+; CHECK-LABEL: Begin function firstbitlow_v4i64
+define noundef <4 x i32> @firstbitlow_v4i64(<4 x i64> noundef %a) {
+entry:
+; Split the i64x4 into 2 i64x2
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]]
+; CHECK: [[left:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1
+; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3
+
+; Do firstbitlow on the 2 i64x2
+; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x4_t]] [[left]]
+; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[left_cast]]
+; CHECK: [[left_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 0 2
+; CHECK: [[left_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 1 3
+; CHECK: [[left_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[left_low_bits]] [[const_neg1x2]]
+; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_high]] [[left_high_bits]] [[left_low_bits]]
+; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_high]] [[const_32x2]] [[const_0x2]]
+; CHECK: [[left_res:%.+]] = OpIAdd [[u32x2_t]] [[left_ans_offset]] [[left_ans_bits]]
+
+; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]]
+; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[right_cast]]
+; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2
+; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3
+; CHECK: [[right_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[right_low_bits]] [[const_neg1x2]]
+; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[right_high_bits]] [[right_low_bits]]
+; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[const_32x2]] [[const_0x2]]
+; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]]
+
+; Merge the resulting 2 i32x2 into the final i32x4 and return it
+; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[left_res]] [[right_res]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i64(<4 x i64> %a)
+ ret <4 x i32> %elt.firstbitlow
+}
+
;declare i16 @llvm.spv.firstbitlow.i16(i16)
;declare i32 @llvm.spv.firstbitlow.i32(i32)
;declare i64 @llvm.spv.firstbitlow.i64(i64)
>From e67adb99590fcc2fe256ec04e0f31c39ea315ab8 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Mon, 16 Dec 2024 10:59:17 -0700
Subject: [PATCH 07/11] Address comments
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 86 ++++++++++++-------
1 file changed, 55 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index b2115528b8dcb4..4588c3bcd2e772 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -105,12 +105,17 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectFirstBitSet32(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I, Register SrcReg,
- unsigned Opcode) const;
+ unsigned BitSetOpcode) const;
bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I, Register SrcReg,
unsigned BitSetOpcode, bool SwapPrimarySide) const;
+ bool selectFirstBitSet64Overflow(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, Register SrcReg,
+ unsigned BitSetOpcode,
+ bool SwapPrimarySide) const;
+
bool selectGlobalValue(Register ResVReg, MachineInstr &I,
const MachineInstr *Init = nullptr) const;
@@ -3157,51 +3162,42 @@ bool SPIRVInstructionSelector::selectFirstBitSet16(
selectFirstBitSet32(ResVReg, ResType, I, ExtReg, BitSetOpcode);
}
-bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- Register SrcReg,
- unsigned Opcode) const {
+bool SPIRVInstructionSelector::selectFirstBitSet32(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ Register SrcReg, unsigned BitSetOpcode) const {
return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
- .addImm(Opcode)
+ .addImm(BitSetOpcode)
.addUse(SrcReg)
.constrainAllUses(TII, TRI, RBI);
}
-bool SPIRVInstructionSelector::selectFirstBitSet64(
+bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
+
unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
bool ZeroAsNull = STI.isOpenCLEnv();
Register ConstIntZero =
GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull);
- Register ConstIntOne =
- GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull);
+ unsigned LeftComponentCount = ComponentCount / 2;
+ unsigned RightComponentCount = ComponentCount - LeftComponentCount;
+ bool LeftIsVector = LeftComponentCount > 1;
- // SPIRV doesn't support vectors with more than 4 components. Since the
- // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
- // operate on vectors with 2 or less components. When largers vectors are
- // seen. Split them, recurse, then recombine them.
- if (ComponentCount > 2) {
- unsigned LeftComponentCount = ComponentCount / 2;
- unsigned RightComponentCount = ComponentCount - LeftComponentCount;
- bool LeftIsVector = LeftComponentCount > 1;
-
- // Split the SrcReg in half into 2 smaller vec registers
- // (ie i64x4 -> i64x2, i64x2)
- MachineIRBuilder MIRBuilder(I);
- SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
- SPIRVType *LeftVecOpType;
- SPIRVType *LeftVecResType;
- if (LeftIsVector) {
- LeftVecOpType =
- GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
- LeftVecResType = GR.getOrCreateSPIRVVectorType(
- BaseType, LeftComponentCount, MIRBuilder);
+ // Split the SrcReg in half into 2 smaller vec registers
+ // (ie i64x4 -> i64x2, i64x2)
+ MachineIRBuilder MIRBuilder(I);
+ SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
+ SPIRVType *LeftVecOpType;
+ SPIRVType *LeftVecResType;
+ if (LeftIsVector) {
+ LeftVecOpType =
+ GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
+ LeftVecResType =
+ GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder);
} else {
LeftVecOpType = OpType;
LeftVecResType = BaseType;
@@ -3219,6 +3215,8 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
bool Result;
+ // Extract the left half from the SrcReg into LeftSideIn
+ // accounting for the special case when it only has one element
if (LeftIsVector) {
auto MIB =
BuildMI(*I.getParent(), I, I.getDebugLoc(),
@@ -3240,6 +3238,9 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
SPIRV::OpVectorExtractDynamic);
}
+ // Extract the right half from the SrcReg into RightSideIn.
+ // Right will always be a vector since the only time one element is left is
+ // when Component == 3, and in that case Left is one element.
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(SPIRV::OpVectorShuffle))
.addDef(RightSideIn)
@@ -3254,7 +3255,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
- // Recursively call selectFirstBitSet64 on the 2 registers
+ // Recursively call selectFirstBitSet64 on the 2 halves
Register LeftSideOut =
MRI->createVirtualRegister(GR.getRegClass(LeftVecResType));
Register RightSideOut =
@@ -3271,6 +3272,26 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
return Result &&
selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
SPIRV::OpCompositeConstruct);
+}
+
+bool SPIRVInstructionSelector::selectFirstBitSet64(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
+ unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
+ SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
+ bool ZeroAsNull = STI.isOpenCLEnv();
+ Register ConstIntZero =
+ GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull);
+ Register ConstIntOne =
+ GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull);
+
+ // SPIRV doesn't support vectors with more than 4 components. Since the
+ // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
+ // operate on vectors with 2 or less components. When largers vectors are
+ // seen. Split them, recurse, then recombine them.
+ if (ComponentCount > 2) {
+ return selectFirstBitSet64Overflow(ResVReg, ResType, I, SrcReg,
+ BitSetOpcode, SwapPrimarySide);
}
// 1. Split int64 into 2 pieces using a bitcast
@@ -3362,6 +3383,9 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
Register SecondaryReg;
Register PrimaryShiftReg;
Register SecondaryShiftReg;
+
+ // By default the emitted opcodes check for the set bit from the MSB side.
+ // Setting SwapPrimarySide checks the set bit from the LSB side
if (SwapPrimarySide) {
PrimaryReg = LowReg;
SecondaryReg = HighReg;
>From 7b1a8ccb9bef76f39947118e9236231f66ed5712 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Mon, 16 Dec 2024 11:09:19 -0700
Subject: [PATCH 08/11] format
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 119 +++++++++---------
1 file changed, 59 insertions(+), 60 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 4588c3bcd2e772..9fe14bc415e041 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3198,80 +3198,79 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
LeftVecResType =
GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder);
- } else {
- LeftVecOpType = OpType;
- LeftVecResType = BaseType;
- }
-
- SPIRVType *RightVecOpType =
- GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder);
- SPIRVType *RightVecResType = GR.getOrCreateSPIRVVectorType(
- BaseType, RightComponentCount, MIRBuilder);
-
- Register LeftSideIn =
- MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType));
- Register RightSideIn =
- MRI->createVirtualRegister(GR.getRegClass(RightVecOpType));
-
- bool Result;
+ } else {
+ LeftVecOpType = OpType;
+ LeftVecResType = BaseType;
+ }
- // Extract the left half from the SrcReg into LeftSideIn
- // accounting for the special case when it only has one element
- if (LeftIsVector) {
- auto MIB =
- BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(SPIRV::OpVectorShuffle))
- .addDef(LeftSideIn)
- .addUse(GR.getSPIRVTypeID(LeftVecOpType))
- .addUse(SrcReg)
- // Per the spec, repeat the vector if only one vec is needed
- .addUse(SrcReg);
+ SPIRVType *RightVecOpType =
+ GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder);
+ SPIRVType *RightVecResType =
+ GR.getOrCreateSPIRVVectorType(BaseType, RightComponentCount, MIRBuilder);
- for (unsigned J = 0; J < LeftComponentCount; J++) {
- MIB.addImm(J);
- }
+ Register LeftSideIn =
+ MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType));
+ Register RightSideIn =
+ MRI->createVirtualRegister(GR.getRegClass(RightVecOpType));
- Result = MIB.constrainAllUses(TII, TRI, RBI);
- } else {
- Result =
- selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero},
- SPIRV::OpVectorExtractDynamic);
- }
+ bool Result;
- // Extract the right half from the SrcReg into RightSideIn.
- // Right will always be a vector since the only time one element is left is
- // when Component == 3, and in that case Left is one element.
+ // Extract the left half from the SrcReg into LeftSideIn
+ // accounting for the special case when it only has one element
+ if (LeftIsVector) {
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(SPIRV::OpVectorShuffle))
- .addDef(RightSideIn)
- .addUse(GR.getSPIRVTypeID(RightVecOpType))
+ .addDef(LeftSideIn)
+ .addUse(GR.getSPIRVTypeID(LeftVecOpType))
.addUse(SrcReg)
// Per the spec, repeat the vector if only one vec is needed
.addUse(SrcReg);
- for (unsigned J = LeftComponentCount; J < ComponentCount; J++) {
+ for (unsigned J = 0; J < LeftComponentCount; J++) {
MIB.addImm(J);
}
- Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+ Result = MIB.constrainAllUses(TII, TRI, RBI);
+ } else {
+ Result =
+ selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero},
+ SPIRV::OpVectorExtractDynamic);
+ }
- // Recursively call selectFirstBitSet64 on the 2 halves
- Register LeftSideOut =
- MRI->createVirtualRegister(GR.getRegClass(LeftVecResType));
- Register RightSideOut =
- MRI->createVirtualRegister(GR.getRegClass(RightVecResType));
- Result = Result &&
- selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn,
- BitSetOpcode, SwapPrimarySide);
- Result = Result &&
- selectFirstBitSet64(RightSideOut, RightVecResType, I, RightSideIn,
- BitSetOpcode, SwapPrimarySide);
-
- // Join the two resulting registers back into the return type
- // (ie i32x2, i32x2 -> i32x4)
- return Result &&
- selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
- SPIRV::OpCompositeConstruct);
+ // Extract the right half from the SrcReg into RightSideIn.
+ // Right will always be a vector since the only time one element is left is
+ // when Component == 3, and in that case Left is one element.
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SPIRV::OpVectorShuffle))
+ .addDef(RightSideIn)
+ .addUse(GR.getSPIRVTypeID(RightVecOpType))
+ .addUse(SrcReg)
+ // Per the spec, repeat the vector if only one vec is needed
+ .addUse(SrcReg);
+
+ for (unsigned J = LeftComponentCount; J < ComponentCount; J++) {
+ MIB.addImm(J);
+ }
+
+ Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+
+ // Recursively call selectFirstBitSet64 on the 2 halves
+ Register LeftSideOut =
+ MRI->createVirtualRegister(GR.getRegClass(LeftVecResType));
+ Register RightSideOut =
+ MRI->createVirtualRegister(GR.getRegClass(RightVecResType));
+ Result =
+ Result && selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn,
+ BitSetOpcode, SwapPrimarySide);
+ Result =
+ Result && selectFirstBitSet64(RightSideOut, RightVecResType, I,
+ RightSideIn, BitSetOpcode, SwapPrimarySide);
+
+ // Join the two resulting registers back into the return type
+ // (ie i32x2, i32x2 -> i32x4)
+ return Result &&
+ selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
+ SPIRV::OpCompositeConstruct);
}
bool SPIRVInstructionSelector::selectFirstBitSet64(
>From 742647b68a4c676b059a67e462d4399677756742 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Mon, 16 Dec 2024 14:02:52 -0700
Subject: [PATCH 09/11] Address comments
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 40 +++++++++----------
1 file changed, 19 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9fe14bc415e041..3872409be44c62 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3191,27 +3191,26 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
// (ie i64x4 -> i64x2, i64x2)
MachineIRBuilder MIRBuilder(I);
SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
- SPIRVType *LeftVecOpType;
- SPIRVType *LeftVecResType;
+ SPIRVType *LeftOpType;
+ SPIRVType *LeftResType;
if (LeftIsVector) {
- LeftVecOpType =
+ LeftOpType =
GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
- LeftVecResType =
+ LeftResType =
GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder);
} else {
- LeftVecOpType = OpType;
- LeftVecResType = BaseType;
+ LeftOpType = OpType;
+ LeftResType = BaseType;
}
- SPIRVType *RightVecOpType =
+ SPIRVType *RightOpType =
GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder);
- SPIRVType *RightVecResType =
+ SPIRVType *RightResType =
GR.getOrCreateSPIRVVectorType(BaseType, RightComponentCount, MIRBuilder);
- Register LeftSideIn =
- MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType));
+ Register LeftSideIn = MRI->createVirtualRegister(GR.getRegClass(LeftOpType));
Register RightSideIn =
- MRI->createVirtualRegister(GR.getRegClass(RightVecOpType));
+ MRI->createVirtualRegister(GR.getRegClass(RightOpType));
bool Result;
@@ -3221,7 +3220,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(SPIRV::OpVectorShuffle))
.addDef(LeftSideIn)
- .addUse(GR.getSPIRVTypeID(LeftVecOpType))
+ .addUse(GR.getSPIRVTypeID(LeftOpType))
.addUse(SrcReg)
// Per the spec, repeat the vector if only one vec is needed
.addUse(SrcReg);
@@ -3232,9 +3231,8 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
Result = MIB.constrainAllUses(TII, TRI, RBI);
} else {
- Result =
- selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero},
- SPIRV::OpVectorExtractDynamic);
+ Result = selectOpWithSrcs(LeftSideIn, LeftOpType, I, {SrcReg, ConstIntZero},
+ SPIRV::OpVectorExtractDynamic);
}
// Extract the right half from the SrcReg into RightSideIn.
@@ -3243,7 +3241,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(SPIRV::OpVectorShuffle))
.addDef(RightSideIn)
- .addUse(GR.getSPIRVTypeID(RightVecOpType))
+ .addUse(GR.getSPIRVTypeID(RightOpType))
.addUse(SrcReg)
// Per the spec, repeat the vector if only one vec is needed
.addUse(SrcReg);
@@ -3256,15 +3254,15 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
// Recursively call selectFirstBitSet64 on the 2 halves
Register LeftSideOut =
- MRI->createVirtualRegister(GR.getRegClass(LeftVecResType));
+ MRI->createVirtualRegister(GR.getRegClass(LeftResType));
Register RightSideOut =
- MRI->createVirtualRegister(GR.getRegClass(RightVecResType));
+ MRI->createVirtualRegister(GR.getRegClass(RightResType));
Result =
- Result && selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn,
+ Result && selectFirstBitSet64(LeftSideOut, LeftResType, I, LeftSideIn,
BitSetOpcode, SwapPrimarySide);
Result =
- Result && selectFirstBitSet64(RightSideOut, RightVecResType, I,
- RightSideIn, BitSetOpcode, SwapPrimarySide);
+ Result && selectFirstBitSet64(RightSideOut, RightResType, I, RightSideIn,
+ BitSetOpcode, SwapPrimarySide);
// Join the two resulting registers back into the return type
// (ie i32x2, i32x2 -> i32x4)
>From 553335fb8f2e43bee60ec3c8d19e925231d215c1 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Mon, 16 Dec 2024 17:54:47 -0700
Subject: [PATCH 10/11] Update tests
---
.../SPIRV/hlsl-intrinsics/firstbithigh.ll | 236 +++++++++++++++---
.../SPIRV/hlsl-intrinsics/firstbitlow.ll | 16 +-
2 files changed, 204 insertions(+), 48 deletions(-)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll
index 3d35e102310f50..dee48061d2fe10 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll
@@ -1,94 +1,250 @@
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-; CHECK: OpMemoryModel Logical GLSL450
-; CHECK-DAG: [[Z:%.*]] = OpConstant %[[#]] 0
-; CHECK-DAG: [[X:%.*]] = OpConstant %[[#]] 1
+; CHECK-DAG: [[glsl_450_ext:%.+]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: OpMemoryModel Logical GLSL450
+; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0
+; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2
+; CHECK-DAG: [[u32x3_t:%.+]] = OpTypeVector [[u32_t]] 3
+; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4
+; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0
+; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]]
+; CHECK-DAG: [[const_1:%.*]] = OpConstant [[u32_t]] 1
+; CHECK-DAG: [[const_32:%.*]] = OpConstant [[u32_t]] 32
+; CHECK-DAG: [[const_32x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_32]] [[const_32]]
+; CHECK-DAG: [[const_neg1:%.*]] = OpConstant [[u32_t]] 4294967295
+; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]]
+; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0
+; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2
+; CHECK-DAG: [[u16x3_t:%.+]] = OpTypeVector [[u16_t]] 3
+; CHECK-DAG: [[u16x4_t:%.+]] = OpTypeVector [[u16_t]] 4
+; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0
+; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2
+; CHECK-DAG: [[u64x3_t:%.+]] = OpTypeVector [[u64_t]] 3
+; CHECK-DAG: [[u64x4_t:%.+]] = OpTypeVector [[u64_t]] 4
+; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool
+; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2
+; CHECK-LABEL: Begin function firstbituhigh_i32
define noundef i32 @firstbituhigh_i32(i32 noundef %a) {
entry:
-; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb %[[#]]
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindUMsb [[a]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i32(i32 %a)
ret i32 %elt.firstbituhigh
}
-define noundef <2 x i32> @firstbituhigh_2xi32(<2 x i32> noundef %a) {
+; CHECK-LABEL: Begin function firstbituhigh_v2xi32
+define noundef <2 x i32> @firstbituhigh_v2xi32(<2 x i32> noundef %a) {
entry:
-; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb %[[#]]
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i32(<2 x i32> %a)
ret <2 x i32> %elt.firstbituhigh
}
+; CHECK-LABEL: Begin function firstbituhigh_v3xi32
+define noundef <3 x i32> @firstbituhigh_v3xi32(<3 x i32> noundef %a) {
+entry:
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x3_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindUMsb [[a]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i32(<3 x i32> %a)
+ ret <3 x i32> %elt.firstbituhigh
+}
+
+; CHECK-LABEL: Begin function firstbituhigh_v4xi32
+define noundef <4 x i32> @firstbituhigh_v4xi32(<4 x i32> noundef %a) {
+entry:
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x4_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i32(<4 x i32> %a)
+ ret <4 x i32> %elt.firstbituhigh
+}
+
+; CHECK-LABEL: Begin function firstbituhigh_i16
define noundef i32 @firstbituhigh_i16(i16 noundef %a) {
entry:
-; CHECK: [[A:%.*]] = OpUConvert %[[#]]
-; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb [[A]]
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]]
+; CHECK: [[a32:%.+]] = OpUConvert [[u32_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindUMsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i16(i16 %a)
ret i32 %elt.firstbituhigh
}
-define noundef <2 x i32> @firstbituhigh_v2i16(<2 x i16> noundef %a) {
+; CHECK-LABEL: Begin function firstbituhigh_v2xi16
+define noundef <2 x i32> @firstbituhigh_v2xi16(<2 x i16> noundef %a) {
entry:
-; CHECK: [[A:%.*]] = OpUConvert %[[#]]
-; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb [[A]]
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x2_t]]
+; CHECK: [[a32:%.+]] = OpUConvert [[u32x2_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i16(<2 x i16> %a)
ret <2 x i32> %elt.firstbituhigh
}
+; CHECK-LABEL: Begin function firstbituhigh_v3xi16
+define noundef <3 x i32> @firstbituhigh_v3xi16(<3 x i16> noundef %a) {
+entry:
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x3_t]]
+; CHECK: [[a32:%.+]] = OpUConvert [[u32x3_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindUMsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i16(<3 x i16> %a)
+ ret <3 x i32> %elt.firstbituhigh
+}
+
+; CHECK-LABEL: Begin function firstbituhigh_v4xi16
+define noundef <4 x i32> @firstbituhigh_v4xi16(<4 x i16> noundef %a) {
+entry:
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x4_t]]
+; CHECK: [[a32:%.+]] = OpUConvert [[u32x4_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i16(<4 x i16> %a)
+ ret <4 x i32> %elt.firstbituhigh
+}
+
+; CHECK-LABEL: Begin function firstbituhigh_i64
define noundef i32 @firstbituhigh_i64(i64 noundef %a) {
entry:
-; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]]
-; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindUMsb [[O]]
-; CHECK: [[M:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[Z]]
-; CHECK: [[L:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[X]]
-; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]]
-; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]]
-; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]]
-; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]]
+; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]]
+; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]]
+; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a32x2]]
+; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]]
+; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]]
+; CHECK: [[should_use_low:%.+]] = OpIEqual [[bool_t]] [[high_bits]] [[const_neg1]]
+; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[low_bits]] [[high_bits]]
+; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[const_0]] [[const_32]]
+; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i64(i64 %a)
ret i32 %elt.firstbituhigh
}
-define noundef <2 x i32> @firstbituhigh_v2i64(<2 x i64> noundef %a) {
+; CHECK-LABEL: Begin function firstbituhigh_v2xi64
+define noundef <2 x i32> @firstbituhigh_v2xi64(<2 x i64> noundef %a) {
entry:
-; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]]
-; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindUMsb [[O]]
-; CHECK: [[M:%.*]] = OpVectorShuffle %[[#]] [[N]] [[N]] 0
-; CHECK: [[L:%.*]] = OpVectorShuffle %[[#]] [[N]] [[N]] 1
-; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]]
-; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]]
-; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]]
-; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]]
-; CHECK: OpReturnValue [[B]]
+; CHECK: [[a64x2:%.+]] = OpFunctionParameter [[u64x2_t]]
+; CHECK: [[a32x4:%.+]] = OpBitcast [[u32x4_t]] [[a64x2]]
+; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a32x4]]
+; CHECK: [[high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 0 2
+; CHECK: [[low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 1 3
+; CHECK: [[should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[high_bits]] [[const_neg1x2]]
+; CHECK: [[ans_bits:%.+]] = OpSelect [[u32x2_t]] [[should_use_low]] [[low_bits]] [[high_bits]]
+; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_low]] [[const_0x2]] [[const_32x2]]
+; CHECK: [[ret:%.+]] = OpIAdd [[u32x2_t]] [[ans_offset]] [[ans_bits]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i64(<2 x i64> %a)
ret <2 x i32> %elt.firstbituhigh
}
+; CHECK-LABEL: Begin function firstbituhigh_v3xi64
+define noundef <3 x i32> @firstbituhigh_v3xi64(<3 x i64> noundef %a) {
+entry:
+; Split the i64x3 into i64, i64x2
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]]
+; CHECK: [[left:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_0]]
+; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 1 2
+
+; Do firstbituhigh on i64, i64x2
+; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x2_t]] [[left]]
+; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[left_cast]]
+; CHECK: [[left_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_0]]
+; CHECK: [[left_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_1]]
+; CHECK: [[left_should_use_low:%.+]] = OpIEqual [[bool_t]] [[left_high_bits]] [[const_neg1]]
+; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32_t]] [[left_should_use_low]] [[left_low_bits]] [[left_high_bits]]
+; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32_t]] [[left_should_use_low]] [[const_0]] [[const_32]]
+; CHECK: [[left_res:%.+]] = OpIAdd [[u32_t]] [[left_ans_offset]] [[left_ans_bits]]
+
+; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]]
+; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[right_cast]]
+; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2
+; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3
+; CHECK: [[right_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[right_high_bits]] [[const_neg1x2]]
+; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[right_low_bits]] [[right_high_bits]]
+; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[const_0x2]] [[const_32x2]]
+; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]]
+
+; Merge the resulting i32, i32x2 into the final i32x3 and return it
+; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[left_res]] [[right_res]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i64(<3 x i64> %a)
+ ret <3 x i32> %elt.firstbituhigh
+}
+
+; CHECK-LABEL: Begin function firstbituhigh_v4xi64
+define noundef <4 x i32> @firstbituhigh_v4xi64(<4 x i64> noundef %a) {
+entry:
+; Split the i64x4 into 2 i64x2
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]]
+; CHECK: [[left:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1
+; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3
+
+; Do firstbithigh on the 2 i64x2
+; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x4_t]] [[left]]
+; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[left_cast]]
+; CHECK: [[left_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 0 2
+; CHECK: [[left_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 1 3
+; CHECK: [[left_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[left_high_bits]] [[const_neg1x2]]
+; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_low]] [[left_low_bits]] [[left_high_bits]]
+; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_low]] [[const_0x2]] [[const_32x2]]
+; CHECK: [[left_res:%.+]] = OpIAdd [[u32x2_t]] [[left_ans_offset]] [[left_ans_bits]]
+
+; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]]
+; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[right_cast]]
+; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2
+; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3
+; CHECK: [[right_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[right_high_bits]] [[const_neg1x2]]
+; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[right_low_bits]] [[right_high_bits]]
+; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[const_0x2]] [[const_32x2]]
+; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]]
+
+; Merge the resulting 2 i32x2 into the final i32x4 and return it
+; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[left_res]] [[right_res]]
+; CHECK: OpReturnValue [[ret]]
+ %elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i64(<4 x i64> %a)
+ ret <4 x i32> %elt.firstbituhigh
+}
+
+; CHECK-LABEL: Begin function firstbitshigh_i32
define noundef i32 @firstbitshigh_i32(i32 noundef %a) {
entry:
-; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindSMsb %[[#]]
+; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindSMsb [[a]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i32(i32 %a)
ret i32 %elt.firstbitshigh
}
+; CHECK-LABEL: Begin function firstbitshigh_i16
define noundef i32 @firstbitshigh_i16(i16 noundef %a) {
entry:
-; CHECK: [[A:%.*]] = OpSConvert %[[#]]
-; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindSMsb %[[#]]
+; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]]
+; CHECK: [[a32:%.+]] = OpSConvert [[u32_t]] [[a16]]
+; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindSMsb [[a32]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i16(i16 %a)
ret i32 %elt.firstbitshigh
}
+; CHECK-LABEL: Begin function firstbitshigh_i64
define noundef i32 @firstbitshigh_i64(i64 noundef %a) {
entry:
-; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]]
-; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindSMsb [[O]]
-; CHECK: [[M:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[Z]]
-; CHECK: [[L:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[X]]
-; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]]
-; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]]
-; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]]
-; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]]
+; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]]
+; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]]
+; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindSMsb [[a32x2]]
+; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]]
+; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]]
+; CHECK: [[should_use_low:%.+]] = OpIEqual [[bool_t]] [[high_bits]] [[const_neg1]]
+; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[low_bits]] [[high_bits]]
+; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[const_0]] [[const_32]]
+; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]]
+; CHECK: OpReturnValue [[ret]]
%elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i64(i64 %a)
ret i32 %elt.firstbitshigh
}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
index f3cc73637b1360..262cc2610600f8 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
@@ -76,8 +76,8 @@ entry:
ret i32 %elt.firstbitlow
}
-; CHECK-LABEL: Begin function firstbitlow_v2i16
-define noundef <2 x i32> @firstbitlow_v2i16(<2 x i16> noundef %a) {
+; CHECK-LABEL: Begin function firstbitlow_v2xi16
+define noundef <2 x i32> @firstbitlow_v2xi16(<2 x i16> noundef %a) {
entry:
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x2_t]]
; CHECK: [[a32:%.+]] = OpUConvert [[u32x2_t]] [[a16]]
@@ -126,8 +126,8 @@ entry:
ret i32 %elt.firstbitlow
}
-; CHECK-LABEL: Begin function firstbitlow_v2i64
-define noundef <2 x i32> @firstbitlow_v2i64(<2 x i64> noundef %a) {
+; CHECK-LABEL: Begin function firstbitlow_v2xi64
+define noundef <2 x i32> @firstbitlow_v2xi64(<2 x i64> noundef %a) {
entry:
; CHECK: [[a64x2:%.+]] = OpFunctionParameter [[u64x2_t]]
; CHECK: [[a32x4:%.+]] = OpBitcast [[u32x4_t]] [[a64x2]]
@@ -143,8 +143,8 @@ entry:
ret <2 x i32> %elt.firstbitlow
}
-; CHECK-LABEL: Begin function firstbitlow_v3i64
-define noundef <3 x i32> @firstbitlow_v3i64(<3 x i64> noundef %a) {
+; CHECK-LABEL: Begin function firstbitlow_v3xi64
+define noundef <3 x i32> @firstbitlow_v3xi64(<3 x i64> noundef %a) {
entry:
; Split the i64x3 into i64, i64x2
; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]]
@@ -177,8 +177,8 @@ entry:
ret <3 x i32> %elt.firstbitlow
}
-; CHECK-LABEL: Begin function firstbitlow_v4i64
-define noundef <4 x i32> @firstbitlow_v4i64(<4 x i64> noundef %a) {
+; CHECK-LABEL: Begin function firstbitlow_v4xi64
+define noundef <4 x i32> @firstbitlow_v4xi64(<4 x i64> noundef %a) {
entry:
; Split the i64x4 into 2 i64x2
; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]]
>From 525a6620435a26cc3eb2cc7bc25262d898780f90 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Tue, 17 Dec 2024 15:18:05 -0700
Subject: [PATCH 11/11] Address comments
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 105 +++++++++---------
1 file changed, 51 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 3872409be44c62..86d44705f0982e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3191,16 +3191,13 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
// (ie i64x4 -> i64x2, i64x2)
MachineIRBuilder MIRBuilder(I);
SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
- SPIRVType *LeftOpType;
- SPIRVType *LeftResType;
+ SPIRVType *LeftOpType = OpType;
+ SPIRVType *LeftResType = BaseType;
if (LeftIsVector) {
LeftOpType =
GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
LeftResType =
GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder);
- } else {
- LeftOpType = OpType;
- LeftResType = BaseType;
}
SPIRVType *RightOpType =
@@ -3212,8 +3209,6 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
Register RightSideIn =
MRI->createVirtualRegister(GR.getRegClass(RightOpType));
- bool Result;
-
// Extract the left half from the SrcReg into LeftSideIn
// accounting for the special case when it only has one element
if (LeftIsVector) {
@@ -3225,14 +3220,16 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
// Per the spec, repeat the vector if only one vec is needed
.addUse(SrcReg);
- for (unsigned J = 0; J < LeftComponentCount; J++) {
+ for (unsigned J = 0; J < LeftComponentCount; J++)
MIB.addImm(J);
- }
- Result = MIB.constrainAllUses(TII, TRI, RBI);
+ if (!MIB.constrainAllUses(TII, TRI, RBI))
+ return false;
+
} else {
- Result = selectOpWithSrcs(LeftSideIn, LeftOpType, I, {SrcReg, ConstIntZero},
- SPIRV::OpVectorExtractDynamic);
+ if (!selectOpWithSrcs(LeftSideIn, LeftOpType, I, {SrcReg, ConstIntZero},
+ SPIRV::OpVectorExtractDynamic))
+ return false;
}
// Extract the right half from the SrcReg into RightSideIn.
@@ -3246,28 +3243,28 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
// Per the spec, repeat the vector if only one vec is needed
.addUse(SrcReg);
- for (unsigned J = LeftComponentCount; J < ComponentCount; J++) {
+ for (unsigned J = LeftComponentCount; J < ComponentCount; J++)
MIB.addImm(J);
- }
- Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+ if (!MIB.constrainAllUses(TII, TRI, RBI))
+ return false;
// Recursively call selectFirstBitSet64 on the 2 halves
Register LeftSideOut =
MRI->createVirtualRegister(GR.getRegClass(LeftResType));
Register RightSideOut =
MRI->createVirtualRegister(GR.getRegClass(RightResType));
- Result =
- Result && selectFirstBitSet64(LeftSideOut, LeftResType, I, LeftSideIn,
- BitSetOpcode, SwapPrimarySide);
- Result =
- Result && selectFirstBitSet64(RightSideOut, RightResType, I, RightSideIn,
- BitSetOpcode, SwapPrimarySide);
+
+ if (!selectFirstBitSet64(LeftSideOut, LeftResType, I, LeftSideIn,
+ BitSetOpcode, SwapPrimarySide))
+ return false;
+ if (!selectFirstBitSet64(RightSideOut, RightResType, I, RightSideIn,
+ BitSetOpcode, SwapPrimarySide))
+ return false;
// Join the two resulting registers back into the return type
// (ie i32x2, i32x2 -> i32x4)
- return Result &&
- selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
+ return selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
SPIRV::OpCompositeConstruct);
}
@@ -3297,13 +3294,15 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder);
Register BitcastReg =
MRI->createVirtualRegister(GR.getRegClass(PostCastType));
- bool Result =
- selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg}, SPIRV::OpBitcast);
+
+ if (!selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg},
+ SPIRV::OpBitcast))
+ return false;
// 2. Find the first set bit from the primary side for all the pieces in #1
Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
- Result = Result && selectFirstBitSet32(FBSReg, PostCastType, I, BitcastReg,
- BitSetOpcode);
+ if (!selectFirstBitSet32(FBSReg, PostCastType, I, BitcastReg, BitSetOpcode))
+ return false;
// 3. Split result vector into high bits and low bits
Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
@@ -3312,12 +3311,12 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
if (IsScalarRes) {
// if scalar do a vector extract
- Result =
- Result && selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero},
- SPIRV::OpVectorExtractDynamic);
- Result =
- Result && selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne},
- SPIRV::OpVectorExtractDynamic);
+ if (!selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero},
+ SPIRV::OpVectorExtractDynamic))
+ return false;
+ if (!selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne},
+ SPIRV::OpVectorExtractDynamic))
+ return false;
} else {
// if vector do a shufflevector
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
@@ -3332,7 +3331,9 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
for (unsigned J = 0; J < ComponentCount * 2; J += 2) {
MIB.addImm(J);
}
- Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+
+ if (!MIB.constrainAllUses(TII, TRI, RBI))
+ return false;
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(SPIRV::OpVectorShuffle))
@@ -3346,7 +3347,8 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
for (unsigned J = 1; J < ComponentCount * 2; J += 2) {
MIB.addImm(J);
}
- Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+ if (!MIB.constrainAllUses(TII, TRI, RBI))
+ return false;
}
// 4. Check the result. When primary bits == -1 use secondary, otherwise use
@@ -3376,10 +3378,10 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
AddOp = SPIRV::OpIAddV;
}
- Register PrimaryReg;
- Register SecondaryReg;
- Register PrimaryShiftReg;
- Register SecondaryShiftReg;
+ Register PrimaryReg = HighReg;
+ Register SecondaryReg = LowReg;
+ Register PrimaryShiftReg = Reg32;
+ Register SecondaryShiftReg = Reg0;
// By default the emitted opcodes check for the set bit from the MSB side.
// Setting SwapPrimarySide checks the set bit from the LSB side
@@ -3388,32 +3390,27 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(
SecondaryReg = HighReg;
PrimaryShiftReg = Reg0;
SecondaryShiftReg = Reg32;
- } else {
- PrimaryReg = HighReg;
- SecondaryReg = LowReg;
- PrimaryShiftReg = Reg32;
- SecondaryShiftReg = Reg0;
}
// Check if the primary bits are == -1
Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType));
- Result = Result && selectOpWithSrcs(BReg, BoolType, I,
- {PrimaryReg, NegOneReg}, SPIRV::OpIEqual);
+ if (!selectOpWithSrcs(BReg, BoolType, I, {PrimaryReg, NegOneReg},
+ SPIRV::OpIEqual))
+ return false;
// Select secondary bits if true in BReg, otherwise primary bits
Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Result =
- Result && selectOpWithSrcs(TmpReg, ResType, I,
- {BReg, SecondaryReg, PrimaryReg}, SelectOp);
+ if (!selectOpWithSrcs(TmpReg, ResType, I, {BReg, SecondaryReg, PrimaryReg},
+ SelectOp))
+ return false;
// 5. Add 32 when high bits are used, otherwise 0 for low bits
Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- Result = Result && selectOpWithSrcs(
- ValReg, ResType, I,
- {BReg, SecondaryShiftReg, PrimaryShiftReg}, SelectOp);
+ if (!selectOpWithSrcs(ValReg, ResType, I,
+ {BReg, SecondaryShiftReg, PrimaryShiftReg}, SelectOp))
+ return false;
- return Result &&
- selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp);
+ return selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp);
}
bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
More information about the llvm-commits
mailing list