[llvm] [SPIR-V] Add __spirv_ wrappers to Non-Uniform, Atomic, Convert Instructions (PR #96790)
Vyacheslav Levytskyy via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 09:19:30 PDT 2024
https://github.com/VyacheslavLevytskyy updated https://github.com/llvm/llvm-project/pull/96790
>From 1b2c123cae021fd54fb95ceac9be9aa14fabf3b1 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 26 Jun 2024 09:11:23 -0700
Subject: [PATCH 1/7] add __spirv_ wrappers to Non-Uniform Instructions
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 14 ++++++++++++++
.../transcoding/sub_group_shuffle_relative.ll | 8 ++++++++
2 files changed, 22 insertions(+)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index fb88332ab8902..61ef29dac2424 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -782,27 +782,41 @@ defm : DemangledGroupBuiltin<"group_broadcast_first", OnlySub, OpGroupNonUniform
// cl_khr_subgroup_non_uniform_vote
defm : DemangledGroupBuiltin<"group_elect", OnlySub, OpGroupNonUniformElect>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformElect", 1, 1, OpGroupNonUniformElect>;
defm : DemangledGroupBuiltin<"group_non_uniform_all", OnlySub, OpGroupNonUniformAll>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformAll", 2, 2, OpGroupNonUniformAll>;
defm : DemangledGroupBuiltin<"group_non_uniform_any", OnlySub, OpGroupNonUniformAny>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformAny", 2, 2, OpGroupNonUniformAny>;
defm : DemangledGroupBuiltin<"group_non_uniform_all_equal", OnlySub, OpGroupNonUniformAllEqual>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformAllEqual", 2, 2, OpGroupNonUniformAllEqual>;
// cl_khr_subgroup_ballot
defm : DemangledGroupBuiltin<"group_ballot", OnlySub, OpGroupNonUniformBallot>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBallot", 2, 2, OpGroupNonUniformBallot>;
defm : DemangledGroupBuiltin<"group_inverse_ballot", OnlySub, OpGroupNonUniformInverseBallot>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformInverseBallot", 2, 2, OpGroupNonUniformInverseBallot>;
defm : DemangledGroupBuiltin<"group_ballot_bit_extract", OnlySub, OpGroupNonUniformBallotBitExtract>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBallotBitExtract", 3, 3, OpGroupNonUniformBallotBitExtract>;
defm : DemangledGroupBuiltin<"group_ballot_bit_count", OnlySub, OpGroupNonUniformBallotBitCount>;
defm : DemangledGroupBuiltin<"group_ballot_inclusive_scan", OnlySub, OpGroupNonUniformBallotBitCount>;
defm : DemangledGroupBuiltin<"group_ballot_exclusive_scan", OnlySub, OpGroupNonUniformBallotBitCount>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBallotBitCount", 3, 3, OpGroupNonUniformBallotBitCount>;
defm : DemangledGroupBuiltin<"group_ballot_find_lsb", OnlySub, OpGroupNonUniformBallotFindLSB>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBallotFindLSB", 2, 2, OpGroupNonUniformBallotFindLSB>;
defm : DemangledGroupBuiltin<"group_ballot_find_msb", OnlySub, OpGroupNonUniformBallotFindMSB>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBallotFindMSB", 2, 2, OpGroupNonUniformBallotFindMSB>;
// cl_khr_subgroup_shuffle
defm : DemangledGroupBuiltin<"group_shuffle", OnlySub, OpGroupNonUniformShuffle>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformShuffle", 3, 3, OpGroupNonUniformShuffle>;
defm : DemangledGroupBuiltin<"group_shuffle_xor", OnlySub, OpGroupNonUniformShuffleXor>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformShuffleXor", 3, 3, OpGroupNonUniformShuffleXor>;
// cl_khr_subgroup_shuffle_relative
defm : DemangledGroupBuiltin<"group_shuffle_up", OnlySub, OpGroupNonUniformShuffleUp>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformShuffleUp", 3, 3, OpGroupNonUniformShuffleUp>;
defm : DemangledGroupBuiltin<"group_shuffle_down", OnlySub, OpGroupNonUniformShuffleDown>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformShuffleDown", 3, 3, OpGroupNonUniformShuffleDown>;
defm : DemangledGroupBuiltin<"group_iadd", WorkOrSub, OpGroupIAdd>;
defm : DemangledGroupBuiltin<"group_reduce_adds", WorkOrSub, OpGroupIAdd>;
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle_relative.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle_relative.ll
index 3ad2c2d87549c..af2bc9b1c77cf 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle_relative.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle_relative.ll
@@ -102,13 +102,17 @@
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
; CHECK-SPIRV: OpFunctionEnd
define dso_local spir_kernel void @testShuffleRelativeChar(i8 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func signext i8 @_Z20sub_group_shuffle_upcj(i8 signext 0, i32 0)
+ %w2 = tail call spir_func i8 @__spirv_GroupNonUniformShuffleUp(i32 3, i8 signext 0, i32 0)
store i8 %2, i8 addrspace(1)* %0, align 1
%3 = tail call spir_func signext i8 @_Z22sub_group_shuffle_downcj(i8 signext 0, i32 0)
+ %w3 = tail call spir_func i8 @__spirv_GroupNonUniformShuffleDown(i32 3, i8 signext 0, i32 0)
%4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
store i8 %3, i8 addrspace(1)* %4, align 1
ret void
@@ -118,6 +122,10 @@ declare dso_local spir_func signext i8 @_Z20sub_group_shuffle_upcj(i8 signext, i
declare dso_local spir_func signext i8 @_Z22sub_group_shuffle_downcj(i8 signext, i32) local_unnamed_addr
+declare dso_local spir_func i8 @__spirv_GroupNonUniformShuffleUp(i32, i8, i32)
+
+declare dso_local spir_func i8 @__spirv_GroupNonUniformShuffleDown(i32, i8, i32)
+
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleUp %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffleDown %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
>From 9c901375473f6ffe4aae436ecbfea8f203458907 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 26 Jun 2024 11:28:52 -0700
Subject: [PATCH 2/7] add a test case to do
---
.../CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll
index 1073473a224df..f0433e65a38bf 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll
@@ -86,15 +86,18 @@
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformElect %[[#bool]] %[[#ScopeSubgroup]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformElect %[[#bool]] %[[#ScopeSubgroup]]
; CHECK-SPIRV: OpFunctionEnd
define dso_local spir_kernel void @testSubGroupElect(i32 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func i32 @_Z15sub_group_electv()
+ %r2 = tail call spir_func i32 @__spirv_GroupNonUniformElect(i32 3)
store i32 %2, i32 addrspace(1)* %0, align 4
ret void
}
declare dso_local spir_func i32 @_Z15sub_group_electv() local_unnamed_addr
+declare dso_local spir_func i32 @__spirv_GroupNonUniformElect(i32)
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAll %[[#bool]] %[[#ScopeSubgroup]] %[[#false]]
>From 67495f7e9f9108c1863760e883f6148d384938a2 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 3 Jul 2024 01:39:01 -0700
Subject: [PATCH 3/7] harden tests
---
.../SPIRV/transcoding/sub_group_ballot.ll | 22 +++++++++++++++++++
.../transcoding/sub_group_non_uniform_vote.ll | 15 +++++++++++--
.../SPIRV/transcoding/sub_group_shuffle.ll | 4 ++++
3 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_ballot.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_ballot.ll
index 6cc9e0f332928..c579859a3f531 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_ballot.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_ballot.ll
@@ -844,55 +844,77 @@ declare dso_local spir_func double @_Z25sub_group_broadcast_firstd(double) local
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#ballot:]] = OpGroupNonUniformBallot %[[#int4]] %[[#ScopeSubgroup]] %[[#false]]
+; CHECK-SPIRV: %[[#ballot2:]] = OpGroupNonUniformBallot %[[#int4]] %[[#ScopeSubgroup]] %[[#false]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformInverseBallot %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformInverseBallot %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot2]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitExtract %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot]] %[[#int_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitExtract %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot2]] %[[#int_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#ballot]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#ballot2]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#ballot]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#ballot2]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#ballot]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#ballot2]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindLSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindLSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot2]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindMSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindMSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot2]]
; CHECK-SPIRV: OpFunctionEnd
define dso_local spir_kernel void @testBallotOperations(i32 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func <4 x i32> @_Z16sub_group_balloti(i32 0)
+ %r2 = tail call spir_func <4 x i32> @__spirv_GroupNonUniformBallot(i32 3, i1 false)
%3 = tail call spir_func i32 @_Z24sub_group_inverse_ballotDv4_j(<4 x i32> %2)
+ %r3 = tail call spir_func i1 @__spirv_GroupNonUniformInverseBallot(i32 3, <4 x i32> %r2)
store i32 %3, i32 addrspace(1)* %0, align 4
%4 = tail call spir_func i32 @_Z28sub_group_ballot_bit_extractDv4_jj(<4 x i32> %2, i32 0)
+ %r4 = tail call spir_func i32 @__spirv_GroupNonUniformBallotBitExtract(i32 3, <4 x i32> %r2, i32 0)
%5 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1
store i32 %4, i32 addrspace(1)* %5, align 4
%6 = tail call spir_func i32 @_Z26sub_group_ballot_bit_countDv4_j(<4 x i32> %2)
+ %r6 = tail call spir_func i32 @__spirv_GroupNonUniformBallotBitCount(i32 3, i32 0, <4 x i32> %r2)
%7 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2
store i32 %6, i32 addrspace(1)* %7, align 4
%8 = tail call spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32> %2)
+ %r8 = tail call spir_func i32 @__spirv_GroupNonUniformBallotBitCount(i32 3, i32 1, <4 x i32> %r2)
%9 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3
store i32 %8, i32 addrspace(1)* %9, align 4
%10 = tail call spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32> %2)
+ %r10 = tail call spir_func i32 @__spirv_GroupNonUniformBallotBitCount(i32 3, i32 2, <4 x i32> %r2)
%11 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4
store i32 %10, i32 addrspace(1)* %11, align 4
%12 = tail call spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32> %2)
+ %r12 = tail call spir_func i32 @__spirv_GroupNonUniformBallotFindLSB(i32 3, <4 x i32> %r2)
%13 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5
store i32 %12, i32 addrspace(1)* %13, align 4
%14 = tail call spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32> %2)
+ %r14 = tail call spir_func i32 @__spirv_GroupNonUniformBallotFindMSB(i32 3, <4 x i32> %r2)
%15 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6
store i32 %14, i32 addrspace(1)* %15, align 4
ret void
}
declare dso_local spir_func <4 x i32> @_Z16sub_group_balloti(i32) local_unnamed_addr
+declare dso_local spir_func <4 x i32> @__spirv_GroupNonUniformBallot(i32, i1)
declare dso_local spir_func i32 @_Z24sub_group_inverse_ballotDv4_j(<4 x i32>) local_unnamed_addr
+declare dso_local spir_func i1 @__spirv_GroupNonUniformInverseBallot(i32, <4 x i32>)
declare dso_local spir_func i32 @_Z28sub_group_ballot_bit_extractDv4_jj(<4 x i32>, i32) local_unnamed_addr
+declare dso_local spir_func i1 @__spirv_GroupNonUniformBallotBitExtract(i32, <4 x i32>, i32) local_unnamed_addr
declare dso_local spir_func i32 @_Z26sub_group_ballot_bit_countDv4_j(<4 x i32>) local_unnamed_addr
+declare dso_local spir_func i32 @__spirv_GroupNonUniformBallotBitCount(i32, i32, <4 x i32>)
declare dso_local spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32>) local_unnamed_addr
declare dso_local spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32>) local_unnamed_addr
declare dso_local spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32>) local_unnamed_addr
+declare dso_local spir_func i32 @__spirv_GroupNonUniformBallotFindLSB(i32, <4 x i32>)
declare dso_local spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32>) local_unnamed_addr
+declare dso_local spir_func i32 @__spirv_GroupNonUniformBallotFindMSB(i32, <4 x i32>)
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#eqMask]]
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll
index f0433e65a38bf..183f1d2eeef59 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_vote.ll
@@ -75,8 +75,10 @@
; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64
; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]]
+; CHECK-SPIRV-DAG: %[[#true:]] = OpConstantTrue %[[#bool]]
; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3
; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0
+; CHECK-SPIRV-DAG: %[[#char_10:]] = OpConstant %[[#char]] 10
; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0
; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0
; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]]
@@ -91,40 +93,47 @@
define dso_local spir_kernel void @testSubGroupElect(i32 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func i32 @_Z15sub_group_electv()
- %r2 = tail call spir_func i32 @__spirv_GroupNonUniformElect(i32 3)
+ %r2 = tail call spir_func i1 @__spirv_GroupNonUniformElect(i32 3)
store i32 %2, i32 addrspace(1)* %0, align 4
ret void
}
declare dso_local spir_func i32 @_Z15sub_group_electv() local_unnamed_addr
-declare dso_local spir_func i32 @__spirv_GroupNonUniformElect(i32)
+declare dso_local spir_func i1 @__spirv_GroupNonUniformElect(i32)
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAll %[[#bool]] %[[#ScopeSubgroup]] %[[#false]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAll %[[#bool]] %[[#ScopeSubgroup]] %[[#true]]
; CHECK-SPIRV: OpFunctionEnd
define dso_local spir_kernel void @testSubGroupNonUniformAll(i32 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func i32 @_Z25sub_group_non_uniform_alli(i32 0)
+ %r2 = tail call spir_func i1 @__spirv_GroupNonUniformAll(i32 3, i1 true)
store i32 %2, i32 addrspace(1)* %0, align 4
ret void
}
declare dso_local spir_func i32 @_Z25sub_group_non_uniform_alli(i32) local_unnamed_addr
+declare dso_local spir_func i1 @__spirv_GroupNonUniformAll(i32, i1)
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAny %[[#bool]] %[[#ScopeSubgroup]] %[[#false]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAny %[[#bool]] %[[#ScopeSubgroup]] %[[#true]]
; CHECK-SPIRV: OpFunctionEnd
define dso_local spir_kernel void @testSubGroupNonUniformAny(i32 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func i32 @_Z25sub_group_non_uniform_anyi(i32 0)
+ %r2 = tail call spir_func i1 @__spirv_GroupNonUniformAny(i32 3, i1 true)
store i32 %2, i32 addrspace(1)* %0, align 4
ret void
}
declare dso_local spir_func i32 @_Z25sub_group_non_uniform_anyi(i32) local_unnamed_addr
+declare dso_local spir_func i1 @__spirv_GroupNonUniformAny(i32, i1)
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#char_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#char_10]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#char_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#short_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformAllEqual %[[#bool]] %[[#ScopeSubgroup]] %[[#short_0]]
@@ -139,6 +148,7 @@ declare dso_local spir_func i32 @_Z25sub_group_non_uniform_anyi(i32) local_unnam
define dso_local spir_kernel void @testSubGroupNonUniformAllEqual(i32 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalc(i8 signext 0)
+ %r2 = tail call spir_func i1 @__spirv_GroupNonUniformAllEqual(i32 3, i8 signext 10)
store i32 %2, i32 addrspace(1)* %0, align 4
%3 = tail call spir_func i32 @_Z31sub_group_non_uniform_all_equalh(i8 zeroext 0)
store i32 %3, i32 addrspace(1)* %0, align 4
@@ -164,6 +174,7 @@ define dso_local spir_kernel void @testSubGroupNonUniformAllEqual(i32 addrspace(
}
declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalc(i8 signext) local_unnamed_addr
+declare dso_local spir_func i1 @__spirv_GroupNonUniformAllEqual(i32, i8 signext)
declare dso_local spir_func i32 @_Z31sub_group_non_uniform_all_equalh(i8 zeroext) local_unnamed_addr
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle.ll
index 9ed5c78bd0ede..370e63e9b5c91 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_shuffle.ll
@@ -107,16 +107,20 @@
define dso_local spir_kernel void @testShuffleChar(i8 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func signext i8 @_Z17sub_group_shufflecj(i8 signext 0, i32 0)
+ %r2 = tail call spir_func signext i8 @__spirv_GroupNonUniformShuffle(i32 3, i8 signext 0, i32 0)
store i8 %2, i8 addrspace(1)* %0, align 1
%3 = tail call spir_func signext i8 @_Z21sub_group_shuffle_xorcj(i8 signext 0, i32 0)
+ %r3 = tail call spir_func signext i8 @__spirv_GroupNonUniformShuffleXor(i32 3, i8 signext 0, i32 0)
%4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
store i8 %3, i8 addrspace(1)* %4, align 1
ret void
}
declare dso_local spir_func signext i8 @_Z17sub_group_shufflecj(i8 signext, i32) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformShuffle(i32, i8 signext, i32)
declare dso_local spir_func signext i8 @_Z21sub_group_shuffle_xorcj(i8 signext, i32) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformShuffleXor(i32, i8 signext, i32)
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformShuffle %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
>From b5de7335ea103080ed6f56169a0252bca660a9ab Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 3 Jul 2024 06:30:27 -0700
Subject: [PATCH 4/7] add more relevant instructions and add test cases
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 20 +++---
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 17 ++++-
.../sub_group_non_uniform_arithmetic.ll | 69 +++++++++++++++----
3 files changed, 83 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index dfec10bec3f9e..3a907366538ef 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1066,15 +1066,17 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call,
Register ScopeReg = Call->Arguments[0];
if (!MRI->getRegClassOrNull(ScopeReg))
MRI->setRegClass(ScopeReg, &SPIRV::IDRegClass);
- Register ValueReg = Call->Arguments[2];
- if (!MRI->getRegClassOrNull(ValueReg))
- MRI->setRegClass(ValueReg, &SPIRV::IDRegClass);
- MIRBuilder.buildInstr(GroupBuiltin->Opcode)
- .addDef(Call->ReturnRegister)
- .addUse(GR->getSPIRVTypeID(Call->ReturnType))
- .addUse(ScopeReg)
- .addImm(GrpOp)
- .addUse(ValueReg);
+ auto MIB = MIRBuilder.buildInstr(GroupBuiltin->Opcode)
+ .addDef(Call->ReturnRegister)
+ .addUse(GR->getSPIRVTypeID(Call->ReturnType))
+ .addUse(ScopeReg)
+ .addImm(GrpOp);
+ for (unsigned i = 2; i < Call->Arguments.size(); ++i) {
+ Register ArgReg = Call->Arguments[i];
+ if (!MRI->getRegClassOrNull(ArgReg))
+ MRI->setRegClass(ArgReg, &SPIRV::IDRegClass);
+ MIB.addUse(ArgReg);
+ }
return true;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 61ef29dac2424..f8ede6ab244e1 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -879,6 +879,7 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_addu", WorkOrSub,
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_adds", WorkOrSub, OpGroupNonUniformIAdd>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_addu", WorkOrSub, OpGroupNonUniformIAdd>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_adds", WorkOrSub, OpGroupNonUniformIAdd>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformIAdd", 3, 4, OpGroupNonUniformIAdd>;
defm : DemangledGroupBuiltin<"group_non_uniform_fadd", WorkOrSub, OpGroupNonUniformFAdd>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_addf", WorkOrSub, OpGroupNonUniformFAdd>;
@@ -893,6 +894,7 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_addd", WorkOrSub,
defm : DemangledGroupBuiltin<"group_clustered_reduce_addf", WorkOrSub, OpGroupNonUniformFAdd>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_addh", WorkOrSub, OpGroupNonUniformFAdd>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_addd", WorkOrSub, OpGroupNonUniformFAdd>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformFAdd", 3, 4, OpGroupNonUniformFAdd>;
defm : DemangledGroupBuiltin<"group_non_uniform_imul", WorkOrSub, OpGroupNonUniformIMul>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_mulu", WorkOrSub, OpGroupNonUniformIMul>;
@@ -903,6 +905,7 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_mulu", WorkOrSub,
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_muls", WorkOrSub, OpGroupNonUniformIMul>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_mulu", WorkOrSub, OpGroupNonUniformIMul>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_muls", WorkOrSub, OpGroupNonUniformIMul>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformIMul", 3, 4, OpGroupNonUniformIMul>;
defm : DemangledGroupBuiltin<"group_non_uniform_fmul", WorkOrSub, OpGroupNonUniformFMul>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_mulf", WorkOrSub, OpGroupNonUniformFMul>;
@@ -917,19 +920,21 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_muld", WorkOrSub,
defm : DemangledGroupBuiltin<"group_clustered_reduce_mulf", WorkOrSub, OpGroupNonUniformFMul>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_mulh", WorkOrSub, OpGroupNonUniformFMul>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_muld", WorkOrSub, OpGroupNonUniformFMul>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformFMul", 3, 4, OpGroupNonUniformFMul>;
defm : DemangledGroupBuiltin<"group_non_uniform_smin", WorkOrSub, OpGroupNonUniformSMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_mins", WorkOrSub, OpGroupNonUniformSMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_mins", WorkOrSub, OpGroupNonUniformSMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_mins", WorkOrSub, OpGroupNonUniformSMin>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_mins", WorkOrSub, OpGroupNonUniformSMin>;
-
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformSMin", 3, 4, OpGroupNonUniformSMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_umin", WorkOrSub, OpGroupNonUniformUMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_minu", WorkOrSub, OpGroupNonUniformUMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_minu", WorkOrSub, OpGroupNonUniformUMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_minu", WorkOrSub, OpGroupNonUniformUMin>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_minu", WorkOrSub, OpGroupNonUniformUMin>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformUMin", 3, 4, OpGroupNonUniformUMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_fmin", WorkOrSub, OpGroupNonUniformFMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_minf", WorkOrSub, OpGroupNonUniformFMin>;
@@ -944,18 +949,21 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_mind", WorkOrSub,
defm : DemangledGroupBuiltin<"group_clustered_reduce_minf", WorkOrSub, OpGroupNonUniformFMin>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_minh", WorkOrSub, OpGroupNonUniformFMin>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_mind", WorkOrSub, OpGroupNonUniformFMin>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformFMin", 3, 4, OpGroupNonUniformFMin>;
defm : DemangledGroupBuiltin<"group_non_uniform_smax", WorkOrSub, OpGroupNonUniformSMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_maxs", WorkOrSub, OpGroupNonUniformSMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_maxs", WorkOrSub, OpGroupNonUniformSMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_maxs", WorkOrSub, OpGroupNonUniformSMax>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_maxs", WorkOrSub, OpGroupNonUniformSMax>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformSMax", 3, 4, OpGroupNonUniformSMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_umax", WorkOrSub, OpGroupNonUniformUMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_maxu", WorkOrSub, OpGroupNonUniformUMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_maxu", WorkOrSub, OpGroupNonUniformUMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_maxu", WorkOrSub, OpGroupNonUniformUMax>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_maxu", WorkOrSub, OpGroupNonUniformUMax>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformUMax", 3, 4, OpGroupNonUniformUMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_fmax", WorkOrSub, OpGroupNonUniformFMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_maxf", WorkOrSub, OpGroupNonUniformFMax>;
@@ -970,6 +978,7 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_maxd", WorkOrSub,
defm : DemangledGroupBuiltin<"group_clustered_reduce_maxf", WorkOrSub, OpGroupNonUniformFMax>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_maxh", WorkOrSub, OpGroupNonUniformFMax>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_maxd", WorkOrSub, OpGroupNonUniformFMax>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformFMax", 3, 4, OpGroupNonUniformFMax>;
defm : DemangledGroupBuiltin<"group_non_uniform_iand", WorkOrSub, OpGroupNonUniformBitwiseAnd>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_andu", WorkOrSub, OpGroupNonUniformBitwiseAnd>;
@@ -980,6 +989,7 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_andu", WorkOrSub,
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_ands", WorkOrSub, OpGroupNonUniformBitwiseAnd>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_andu", WorkOrSub, OpGroupNonUniformBitwiseAnd>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_ands", WorkOrSub, OpGroupNonUniformBitwiseAnd>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBitwiseAnd", 3, 4, OpGroupNonUniformBitwiseAnd>;
defm : DemangledGroupBuiltin<"group_non_uniform_ior", WorkOrSub, OpGroupNonUniformBitwiseOr>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_oru", WorkOrSub, OpGroupNonUniformBitwiseOr>;
@@ -990,6 +1000,7 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_oru", WorkOrSub,
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_ors", WorkOrSub, OpGroupNonUniformBitwiseOr>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_oru", WorkOrSub, OpGroupNonUniformBitwiseOr>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_ors", WorkOrSub, OpGroupNonUniformBitwiseOr>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBitwiseOr", 3, 4, OpGroupNonUniformBitwiseOr>;
defm : DemangledGroupBuiltin<"group_non_uniform_ixor", WorkOrSub, OpGroupNonUniformBitwiseXor>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_xoru", WorkOrSub, OpGroupNonUniformBitwiseXor>;
@@ -1000,24 +1011,28 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_xoru", WorkOrSub,
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_xors", WorkOrSub, OpGroupNonUniformBitwiseXor>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_xoru", WorkOrSub, OpGroupNonUniformBitwiseXor>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_xors", WorkOrSub, OpGroupNonUniformBitwiseXor>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBitwiseXor", 3, 4, OpGroupNonUniformBitwiseXor>;
defm : DemangledGroupBuiltin<"group_non_uniform_logical_iand", WorkOrSub, OpGroupNonUniformLogicalAnd>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_logical_ands", WorkOrSub, OpGroupNonUniformLogicalAnd>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_logical_ands", WorkOrSub, OpGroupNonUniformLogicalAnd>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_logical_ands", WorkOrSub, OpGroupNonUniformLogicalAnd>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_logical_and", WorkOrSub, OpGroupNonUniformLogicalAnd>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformLogicalAnd", 3, 4, OpGroupNonUniformLogicalAnd>;
defm : DemangledGroupBuiltin<"group_non_uniform_logical_ior", WorkOrSub, OpGroupNonUniformLogicalOr>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_logical_ors", WorkOrSub, OpGroupNonUniformLogicalOr>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_logical_ors", WorkOrSub, OpGroupNonUniformLogicalOr>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_logical_ors", WorkOrSub, OpGroupNonUniformLogicalOr>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_logical_or", WorkOrSub, OpGroupNonUniformLogicalOr>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformLogicalOr", 3, 4, OpGroupNonUniformLogicalOr>;
defm : DemangledGroupBuiltin<"group_non_uniform_logical_ixor", WorkOrSub, OpGroupNonUniformLogicalXor>;
defm : DemangledGroupBuiltin<"group_non_uniform_reduce_logical_xors", WorkOrSub, OpGroupNonUniformLogicalXor>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_logical_xors", WorkOrSub, OpGroupNonUniformLogicalXor>;
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_logical_xors", WorkOrSub, OpGroupNonUniformLogicalXor>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_logical_xor", WorkOrSub, OpGroupNonUniformLogicalXor>;
+defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformLogicalXor", 3, 4, OpGroupNonUniformLogicalXor>;
// cl_khr_subgroup_rotate / SPV_KHR_subgroup_rotate
defm : DemangledGroupBuiltin<"group_rotate", OnlySub, OpGroupNonUniformRotateKHR>;
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_arithmetic.ll b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_arithmetic.ll
index 8f4910ff512f8..adf73fe153dea 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_arithmetic.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/sub_group_non_uniform_arithmetic.ll
@@ -331,8 +331,10 @@
; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]]
; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3
; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0
+; CHECK-SPIRV-DAG: %[[#char_10:]] = OpConstant %[[#char]] 10
; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0
; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0
+; CHECK-SPIRV-DAG: %[[#int_32:]] = OpConstant %[[#int]] 32
; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]]
; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0
; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0
@@ -340,9 +342,13 @@
; CHECK-SPIRV: OpFunction
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_10]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_10]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_10]] %[[#int_32]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_10]] %[[#int_32]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
@@ -355,14 +361,18 @@
define dso_local spir_kernel void @testNonUniformArithmeticChar(i8 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_addc(i8 signext 0)
+ %r2 = tail call spir_func signext i8 @__spirv_GroupNonUniformIAdd(i32 3, i32 0, i8 signext 10)
store i8 %2, i8 addrspace(1)* %0, align 1
%3 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_mulc(i8 signext 0)
+ %r3 = tail call spir_func signext i8 @__spirv_GroupNonUniformIMul(i32 3, i32 1, i8 signext 10)
%4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
store i8 %3, i8 addrspace(1)* %4, align 1
%5 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_minc(i8 signext 0)
+ %r5 = tail call spir_func signext i8 @__spirv_GroupNonUniformSMin(i32 3, i32 0, i8 signext 10, i32 32)
%6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2
store i8 %5, i8 addrspace(1)* %6, align 1
%7 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_maxc(i8 signext 0)
+ %r7 = tail call spir_func signext i8 @__spirv_GroupNonUniformSMax(i32 3, i32 0, i8 signext 10, i32 32)
%8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3
store i8 %7, i8 addrspace(1)* %8, align 1
%9 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_addc(i8 signext 0)
@@ -393,12 +403,16 @@ define dso_local spir_kernel void @testNonUniformArithmeticChar(i8 addrspace(1)*
}
declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_addc(i8 signext) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformIAdd(i32, i32, i8)
declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_mulc(i8 signext) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformIMul(i32, i32, i8)
declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_minc(i8 signext) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformSMin(i32, i32, i8, i32)
declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_maxc(i8 signext) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformSMax(i32, i32, i8, i32)
declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_addc(i8 signext) local_unnamed_addr
@@ -576,7 +590,9 @@ declare dso_local spir_func signext i16 @_Z40sub_group_non_uniform_scan_exclusiv
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] %[[#int_32]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]]
+; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#short]] %[[#ScopeSubgroup]] Reduce %[[#short_0]] %[[#int_32]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] InclusiveScan %[[#short_0]]
@@ -594,9 +610,11 @@ define dso_local spir_kernel void @testNonUniformArithmeticUShort(i16 addrspace(
%4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1
store i16 %3, i16 addrspace(1)* %4, align 2
%5 = tail call spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_mint(i16 zeroext 0)
+ %r5 = tail call spir_func signext i16 @__spirv_GroupNonUniformUMin(i32 3, i32 0, i16 signext 0, i32 32)
%6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2
store i16 %5, i16 addrspace(1)* %6, align 2
%7 = tail call spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_maxt(i16 zeroext 0)
+ %r7 = tail call spir_func signext i16 @__spirv_GroupNonUniformUMax(i32 3, i32 0, i16 signext 0, i32 32)
%8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3
store i16 %7, i16 addrspace(1)* %8, align 2
%9 = tail call spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_addt(i16 zeroext 0)
@@ -631,8 +649,10 @@ declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_addt(i
declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_mult(i16 zeroext) local_unnamed_addr
declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_mint(i16 zeroext) local_unnamed_addr
+declare dso_local spir_func zeroext i16 @__spirv_GroupNonUniformUMin(i32, i32, i16 signext, i32)
declare dso_local spir_func zeroext i16 @_Z32sub_group_non_uniform_reduce_maxt(i16 zeroext) local_unnamed_addr
+declare dso_local spir_func zeroext i16 @__spirv_GroupNonUniformUMax(i32, i32, i16 signext, i32)
declare dso_local spir_func zeroext i16 @_Z40sub_group_non_uniform_scan_inclusive_addt(i16 zeroext) local_unnamed_addr
@@ -963,10 +983,10 @@ declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_minm(i
declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_maxm(i64) local_unnamed_addr
; CHECK-SPIRV: OpFunction
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformFMax %[[#float]] %[[#ScopeSubgroup]] Reduce %[[#float_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] InclusiveScan %[[#float_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] InclusiveScan %[[#float_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] InclusiveScan %[[#float_0]]
@@ -979,14 +999,18 @@ declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_maxm(i
define dso_local spir_kernel void @testNonUniformArithmeticFloat(float addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func float @_Z32sub_group_non_uniform_reduce_addf(float 0.000000e+00)
+ %r2 = tail call spir_func float @__spirv_GroupNonUniformFAdd(i32 3, i32 0, float 0.000000e+00)
store float %2, float addrspace(1)* %0, align 4
%3 = tail call spir_func float @_Z32sub_group_non_uniform_reduce_mulf(float 0.000000e+00)
+ %r3 = tail call spir_func float @__spirv_GroupNonUniformFMul(i32 3, i32 0, float 0.000000e+00)
%4 = getelementptr inbounds float, float addrspace(1)* %0, i64 1
store float %3, float addrspace(1)* %4, align 4
%5 = tail call spir_func float @_Z32sub_group_non_uniform_reduce_minf(float 0.000000e+00)
+ %r5 = tail call spir_func float @__spirv_GroupNonUniformFMin(i32 3, i32 0, float 0.000000e+00)
%6 = getelementptr inbounds float, float addrspace(1)* %0, i64 2
store float %5, float addrspace(1)* %6, align 4
%7 = tail call spir_func float @_Z32sub_group_non_uniform_reduce_maxf(float 0.000000e+00)
+ %r7 = tail call spir_func float @__spirv_GroupNonUniformFMax(i32 3, i32 0, float 0.000000e+00)
%8 = getelementptr inbounds float, float addrspace(1)* %0, i64 3
store float %7, float addrspace(1)* %8, align 4
%9 = tail call spir_func float @_Z40sub_group_non_uniform_scan_inclusive_addf(float 0.000000e+00)
@@ -1017,12 +1041,16 @@ define dso_local spir_kernel void @testNonUniformArithmeticFloat(float addrspace
}
declare dso_local spir_func float @_Z32sub_group_non_uniform_reduce_addf(float) local_unnamed_addr
+declare dso_local spir_func float @__spirv_GroupNonUniformFAdd(i32, i32, float)
declare dso_local spir_func float @_Z32sub_group_non_uniform_reduce_mulf(float) local_unnamed_addr
+declare dso_local spir_func float @__spirv_GroupNonUniformFMul(i32, i32, float)
declare dso_local spir_func float @_Z32sub_group_non_uniform_reduce_minf(float) local_unnamed_addr
+declare dso_local spir_func float @__spirv_GroupNonUniformFMin(i32, i32, float)
declare dso_local spir_func float @_Z32sub_group_non_uniform_reduce_maxf(float) local_unnamed_addr
+declare dso_local spir_func float @__spirv_GroupNonUniformFMax(i32, i32, float)
declare dso_local spir_func float @_Z40sub_group_non_uniform_scan_inclusive_addf(float) local_unnamed_addr
@@ -1197,12 +1225,12 @@ declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_exclusive_min
declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_exclusive_maxd(double) local_unnamed_addr
; CHECK-SPIRV: OpFunction
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] Reduce %[[#char_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] InclusiveScan %[[#char_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ExclusiveScan %[[#char_0]]
@@ -1210,20 +1238,26 @@ declare dso_local spir_func double @_Z40sub_group_non_uniform_scan_exclusive_max
define dso_local spir_kernel void @testNonUniformBitwiseChar(i8 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_andc(i8 signext 0)
+ %r2 = tail call spir_func signext i8 @__spirv_GroupNonUniformBitwiseAnd(i32 3, i32 0, i8 signext 0)
store i8 %2, i8 addrspace(1)* %0, align 1
%3 = tail call spir_func signext i8 @_Z31sub_group_non_uniform_reduce_orc(i8 signext 0)
+ %r3 = tail call spir_func signext i8 @__spirv_GroupNonUniformBitwiseOr(i32 3, i32 0, i8 signext 0)
%4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
store i8 %3, i8 addrspace(1)* %4, align 1
%5 = tail call spir_func signext i8 @_Z32sub_group_non_uniform_reduce_xorc(i8 signext 0)
+ %r5 = tail call spir_func signext i8 @__spirv_GroupNonUniformBitwiseXor(i32 3, i32 0, i8 signext 0)
%6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2
store i8 %5, i8 addrspace(1)* %6, align 1
%7 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_andc(i8 signext 0)
+ %r7 = tail call spir_func signext i8 @__spirv_GroupNonUniformBitwiseAnd(i32 3, i32 1, i8 signext 0)
%8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3
store i8 %7, i8 addrspace(1)* %8, align 1
%9 = tail call spir_func signext i8 @_Z39sub_group_non_uniform_scan_inclusive_orc(i8 signext 0)
+ %r9 = tail call spir_func signext i8 @__spirv_GroupNonUniformBitwiseOr(i32 3, i32 1, i8 signext 0)
%10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 4
store i8 %9, i8 addrspace(1)* %10, align 1
%11 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_xorc(i8 signext 0)
+ %r11 = tail call spir_func signext i8 @__spirv_GroupNonUniformBitwiseXor(i32 3, i32 1, i8 signext 0)
%12 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 5
store i8 %11, i8 addrspace(1)* %12, align 1
%13 = tail call spir_func signext i8 @_Z40sub_group_non_uniform_scan_exclusive_andc(i8 signext 0)
@@ -1239,10 +1273,13 @@ define dso_local spir_kernel void @testNonUniformBitwiseChar(i8 addrspace(1)* no
}
declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_andc(i8 signext) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformBitwiseAnd(i32, i32, i8 signext)
declare dso_local spir_func signext i8 @_Z31sub_group_non_uniform_reduce_orc(i8 signext) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformBitwiseOr(i32, i32, i8 signext)
declare dso_local spir_func signext i8 @_Z32sub_group_non_uniform_reduce_xorc(i8 signext) local_unnamed_addr
+declare dso_local spir_func signext i8 @__spirv_GroupNonUniformBitwiseXor(i32, i32, i8 signext)
declare dso_local spir_func signext i8 @_Z40sub_group_non_uniform_scan_inclusive_andc(i8 signext) local_unnamed_addr
@@ -1677,9 +1714,9 @@ declare dso_local spir_func i64 @_Z39sub_group_non_uniform_scan_exclusive_orm(i6
declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_xorm(i64) local_unnamed_addr
; CHECK-SPIRV: OpFunction
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalOr %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]]
-; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformLogicalOr %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]]
+; CHECK-SPIRV-COUNT-2: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] Reduce %[[#false]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] InclusiveScan %[[#false]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalOr %[[#bool]] %[[#ScopeSubgroup]] InclusiveScan %[[#false]]
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] InclusiveScan %[[#false]]
@@ -1690,11 +1727,14 @@ declare dso_local spir_func i64 @_Z40sub_group_non_uniform_scan_exclusive_xorm(i
define dso_local spir_kernel void @testNonUniformLogical(i32 addrspace(1)* nocapture) local_unnamed_addr {
%2 = tail call spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_andi(i32 0)
+ %r2 = tail call spir_func i1 @__spirv_GroupNonUniformLogicalAnd(i32 3, i32 0, i1 false)
store i32 %2, i32 addrspace(1)* %0, align 4
%3 = tail call spir_func i32 @_Z39sub_group_non_uniform_reduce_logical_ori(i32 0)
+ %r3 = tail call spir_func i1 @__spirv_GroupNonUniformLogicalOr(i32 3, i32 0, i1 false)
%4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1
store i32 %3, i32 addrspace(1)* %4, align 4
%5 = tail call spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_xori(i32 0)
+ %r5 = tail call spir_func i1 @__spirv_GroupNonUniformLogicalXor(i32 3, i32 0, i1 false)
%6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2
store i32 %5, i32 addrspace(1)* %6, align 4
%7 = tail call spir_func i32 @_Z48sub_group_non_uniform_scan_inclusive_logical_andi(i32 0)
@@ -1719,10 +1759,13 @@ define dso_local spir_kernel void @testNonUniformLogical(i32 addrspace(1)* nocap
}
declare dso_local spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_andi(i32) local_unnamed_addr
+declare dso_local spir_func i1 @__spirv_GroupNonUniformLogicalAnd(i32, i32, i1)
declare dso_local spir_func i32 @_Z39sub_group_non_uniform_reduce_logical_ori(i32) local_unnamed_addr
+declare dso_local spir_func i1 @__spirv_GroupNonUniformLogicalOr(i32, i32, i1)
declare dso_local spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_xori(i32) local_unnamed_addr
+declare dso_local spir_func i1 @__spirv_GroupNonUniformLogicalXor(i32, i32, i1)
declare dso_local spir_func i32 @_Z48sub_group_non_uniform_scan_inclusive_logical_andi(i32) local_unnamed_addr
>From 82fa5fe2baa5a45e63af619d52f193a8d5105af3 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 3 Jul 2024 09:00:15 -0700
Subject: [PATCH 5/7] add Atomic, Convert instructions and update test cases
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 11 +++++
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 18 +++++++
.../test/CodeGen/SPIRV/instructions/atomic.ll | 22 +++++++++
.../SPIRV/instructions/integer-casts.ll | 48 +++++++++++++++++++
4 files changed, 99 insertions(+)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 3a907366538ef..038dc4c09b452 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1469,6 +1469,9 @@ static bool generateAtomicInst(const SPIRV::IncomingCall *Call,
case SPIRV::OpAtomicFlagClear:
return buildAtomicFlagInst(Call, Opcode, MIRBuilder, GR);
default:
+ if (Call->isSpirvOp())
+ return buildOpFromWrapper(MIRBuilder, Opcode, Call,
+ GR->getSPIRVTypeID(Call->ReturnType));
return false;
}
}
@@ -2234,6 +2237,14 @@ static bool generateConvertInst(const StringRef DemangledCall,
const SPIRV::ConvertBuiltin *Builtin =
SPIRV::lookupConvertBuiltin(Call->Builtin->Name, Call->Builtin->Set);
+ if (!Builtin && Call->isSpirvOp()) {
+ const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
+ unsigned Opcode =
+ SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode;
+ return buildOpFromWrapper(MIRBuilder, Opcode, Call,
+ GR->getSPIRVTypeID(Call->ReturnType));
+ }
+
if (Builtin->IsSaturated)
buildOpDecorate(Call->ReturnRegister, MIRBuilder,
SPIRV::Decoration::SaturatedConversion, {});
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index f8ede6ab244e1..5e2ab146fac2e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -617,6 +617,10 @@ defm : DemangledNativeBuiltin<"atomic_flag_test_and_set_explicit", OpenCL_std, A
defm : DemangledNativeBuiltin<"atomic_flag_clear", OpenCL_std, Atomic, 1, 1, OpAtomicFlagClear>;
defm : DemangledNativeBuiltin<"__spirv_AtomicFlagClear", OpenCL_std, Atomic, 3, 3, OpAtomicFlagClear>;
defm : DemangledNativeBuiltin<"atomic_flag_clear_explicit", OpenCL_std, Atomic, 2, 3, OpAtomicFlagClear>;
+defm : DemangledNativeBuiltin<"__spirv_AtomicSMin", OpenCL_std, Atomic, 4, 4, OpAtomicSMin>;
+defm : DemangledNativeBuiltin<"__spirv_AtomicSMax", OpenCL_std, Atomic, 4, 4, OpAtomicSMax>;
+defm : DemangledNativeBuiltin<"__spirv_AtomicUMin", OpenCL_std, Atomic, 4, 4, OpAtomicUMin>;
+defm : DemangledNativeBuiltin<"__spirv_AtomicUMax", OpenCL_std, Atomic, 4, 4, OpAtomicUMax>;
// Barrier builtin records:
defm : DemangledNativeBuiltin<"barrier", OpenCL_std, Barrier, 1, 3, OpControlBarrier>;
@@ -1410,6 +1414,20 @@ defm : DemangledConvertBuiltin<"convert_long", OpenCL_std>;
defm : DemangledConvertBuiltin<"convert_ulong", OpenCL_std>;
defm : DemangledConvertBuiltin<"convert_float", OpenCL_std>;
+defm : DemangledNativeBuiltin<"__spirv_ConvertFToU", OpenCL_std, Convert, 1, 1, OpConvertFToU>;
+defm : DemangledNativeBuiltin<"__spirv_ConvertFToS", OpenCL_std, Convert, 1, 1, OpConvertFToS>;
+defm : DemangledNativeBuiltin<"__spirv_ConvertSToF", OpenCL_std, Convert, 1, 1, OpConvertSToF>;
+defm : DemangledNativeBuiltin<"__spirv_ConvertUToF", OpenCL_std, Convert, 1, 1, OpConvertUToF>;
+defm : DemangledNativeBuiltin<"__spirv_UConvert", OpenCL_std, Convert, 1, 1, OpUConvert>;
+defm : DemangledNativeBuiltin<"__spirv_SConvert", OpenCL_std, Convert, 1, 1, OpSConvert>;
+defm : DemangledNativeBuiltin<"__spirv_FConvert", OpenCL_std, Convert, 1, 1, OpFConvert>;
+defm : DemangledNativeBuiltin<"__spirv_QuantizeToF16", OpenCL_std, Convert, 1, 1, OpQuantizeToF16>;
+defm : DemangledNativeBuiltin<"__spirv_ConvertPtrToU", OpenCL_std, Convert, 1, 1, OpConvertPtrToU>;
+defm : DemangledNativeBuiltin<"__spirv_SatConvertSToU", OpenCL_std, Convert, 1, 1, OpSatConvertSToU>;
+defm : DemangledNativeBuiltin<"__spirv_SatConvertUToS", OpenCL_std, Convert, 1, 1, OpSatConvertUToS>;
+defm : DemangledNativeBuiltin<"__spirv_ConvertUToPtr", OpenCL_std, Convert, 1, 1, OpConvertUToPtr>;
+
+
// cl_intel_bfloat16_conversions / SPV_INTEL_bfloat16_conversion
// Multiclass used to define at the same time both a demangled builtin records
// and a corresponding convert builtin records.
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
index ce59bb2064027..9fd9de5295630 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
@@ -12,6 +12,7 @@
; CHECK-DAG: OpName [[XOR:%.*]] "test_xor"
; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0
+; CHECK-DAG: [[I64Ty:%.*]] = OpTypeInt 64 0
;; Device scope is encoded with constant 1
; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1
;; "monotonic" maps to the relaxed memory semantics, encoded with constant 0
@@ -133,3 +134,24 @@ define i32 @test_xor(i32* %ptr, i32 %val) {
%r = atomicrmw xor i32* %ptr, i32 %val monotonic
ret i32 %r
}
+
+; CHECK: OpFunction
+; CHECK: [[Arg1:%.*]] = OpFunctionParameter
+; CHECK: [[Arg2:%.*]] = OpFunctionParameter
+; CHECK: OpAtomicSMin [[I64Ty]] %[[#]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK: OpAtomicSMax [[I64Ty]] %[[#]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK: OpAtomicUMin [[I64Ty]] %[[#]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK: OpAtomicUMax [[I64Ty]] %[[#]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK: OpFunctionEnd
+define dso_local spir_kernel void @test_wrappers(ptr addrspace(4) %arg, i64 %val) {
+ %r1 = call spir_func i64 @__spirv_AtomicSMin(ptr addrspace(4) %arg, i32 1, i32 0, i64 %val)
+ %r2 = call spir_func i64 @__spirv_AtomicSMax(ptr addrspace(4) %arg, i32 1, i32 0, i64 %val)
+ %r3 = call spir_func i64 @__spirv_AtomicUMin(ptr addrspace(4) %arg, i32 1, i32 0, i64 %val)
+ %r4 = call spir_func i64 @__spirv_AtomicUMax(ptr addrspace(4) %arg, i32 1, i32 0, i64 %val)
+ ret void
+}
+
+declare dso_local spir_func i64 @__spirv_AtomicSMin(ptr addrspace(4), i32, i32, i64)
+declare dso_local spir_func i64 @__spirv_AtomicSMax(ptr addrspace(4), i32, i32, i64)
+declare dso_local spir_func i64 @__spirv_AtomicUMin(ptr addrspace(4), i32, i32, i64)
+declare dso_local spir_func i64 @__spirv_AtomicUMax(ptr addrspace(4), i32, i32, i64)
diff --git a/llvm/test/CodeGen/SPIRV/instructions/integer-casts.ll b/llvm/test/CodeGen/SPIRV/instructions/integer-casts.ll
index a84ef3f70c575..fda7567acc61d 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/integer-casts.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/integer-casts.ll
@@ -20,6 +20,9 @@
; CHECK-DAG: OpName [[ZEXT8_16v4:%.*]] "u8tou16v4"
; CHECK-DAG: OpName [[ZEXT16_32v4:%.*]] "u16tou32v4"
+; CHECK-DAG: [[F32:%.*]] = OpTypeFloat 32
+; CHECK-DAG: [[F16:%.*]] = OpTypeFloat 16
+; CHECK-DAG: [[U64:%.*]] = OpTypeInt 64 0
; CHECK-DAG: [[U32:%.*]] = OpTypeInt 32 0
; CHECK-DAG: [[U16:%.*]] = OpTypeInt 16 0
; CHECK-DAG: [[U8:%.*]] = OpTypeInt 8 0
@@ -227,3 +230,48 @@ define <4 x i32> @u16tou32v4(<4 x i16> %a) {
%r = zext <4 x i16> %a to <4 x i32>
ret <4 x i32> %r
}
+
+; CHECK: OpFunction
+; CHECK: [[Arg1:%.*]] = OpFunctionParameter
+; CHECK: [[Arg2:%.*]] = OpFunctionParameter
+; CHECK: %[[#]] = OpConvertFToU [[U32]] %[[#]]
+; CHECK: %[[#]] = OpConvertFToS [[U32]] %[[#]]
+; CHECK: %[[#]] = OpConvertSToF [[F32]] %[[#]]
+; CHECK: %[[#]] = OpConvertUToF [[F32]] %[[#]]
+; CHECK: %[[#]] = OpUConvert [[U32]] %[[#]]
+; CHECK: %[[#]] = OpSConvert [[U32]] %[[#]]
+; CHECK: %[[#]] = OpFConvert [[F16]] %[[#]]
+; CHECK: %[[#]] = OpQuantizeToF16 [[F32]] %[[#]]
+; CHECK: %[[#]] = OpSatConvertSToU [[U64]] %[[#]]
+; CHECK: %[[#]] = OpSatConvertUToS [[U64]] %[[#]]
+; CHECK: %[[#]] = OpConvertPtrToU [[U64]] [[Arg1]]
+; CHECK: %[[#]] = OpConvertUToPtr %[[#]] [[Arg2]]
+; CHECK: OpFunctionEnd
+define dso_local spir_kernel void @test_wrappers(ptr addrspace(4) %arg, i64 %arg_ptr) {
+ %r1 = call spir_func i32 @__spirv_ConvertFToU(float 0.000000e+00)
+ %r2 = call spir_func i32 @__spirv_ConvertFToS(float 0.000000e+00)
+ %r3 = call spir_func float @__spirv_ConvertSToF(i32 1)
+ %r4 = call spir_func float @__spirv_ConvertUToF(i32 1)
+ %r5 = call spir_func i32 @__spirv_UConvert(i64 1)
+ %r6 = call spir_func i32 @__spirv_SConvert(i64 1)
+ %r7 = call spir_func half @__spirv_FConvert(float 0.000000e+00)
+ %r8 = call spir_func float @__spirv_QuantizeToF16(float 0.000000e+00)
+ %r9 = call spir_func i64 @__spirv_SatConvertSToU(i64 1)
+ %r10 = call spir_func i64 @__spirv_SatConvertUToS(i64 1)
+ %r11 = call spir_func i64 @__spirv_ConvertPtrToU(ptr addrspace(4) %arg)
+ %r12 = call spir_func ptr addrspace(4) @__spirv_ConvertUToPtr(i64 %arg_ptr)
+ ret void
+}
+
+declare dso_local spir_func i32 @__spirv_ConvertFToU(float)
+declare dso_local spir_func i32 @__spirv_ConvertFToS(float)
+declare dso_local spir_func float @__spirv_ConvertSToF(i32)
+declare dso_local spir_func float @__spirv_ConvertUToF(i32)
+declare dso_local spir_func i32 @__spirv_UConvert(i64)
+declare dso_local spir_func i32 @__spirv_SConvert(i64)
+declare dso_local spir_func half @__spirv_FConvert(float)
+declare dso_local spir_func float @__spirv_QuantizeToF16(float)
+declare dso_local spir_func i64 @__spirv_SatConvertSToU(i64)
+declare dso_local spir_func i64 @__spirv_SatConvertUToS(i64)
+declare dso_local spir_func i64 @__spirv_ConvertPtrToU(ptr addrspace(4))
+declare dso_local spir_func ptr addrspace(4) @__spirv_ConvertUToPtr(i64)
>From 21d5970b05f9c41434377f17d6a84dd8709ca46f Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 3 Jul 2024 09:16:26 -0700
Subject: [PATCH 6/7] op Dot and update tests
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 3 +++
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 1 +
llvm/test/CodeGen/SPIRV/instructions/atomic.ll | 3 +++
.../CodeGen/SPIRV/instructions/integer-casts.ll | 4 ++++
llvm/test/CodeGen/SPIRV/transcoding/OpDot.ll | 17 +++++++++++++++--
5 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 038dc4c09b452..286bdb9a7ebac 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1515,6 +1515,9 @@ static bool generateCastToPtrInst(const SPIRV::IncomingCall *Call,
static bool generateDotOrFMulInst(const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
+ if (Call->isSpirvOp())
+ return buildOpFromWrapper(MIRBuilder, SPIRV::OpDot, Call,
+ GR->getSPIRVTypeID(Call->ReturnType));
unsigned Opcode = GR->getSPIRVTypeForVReg(Call->Arguments[0])->getOpcode();
bool IsVec = Opcode == SPIRV::OpTypeVector;
// Use OpDot only in case of vector args and OpFMul in case of scalar args.
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 5e2ab146fac2e..989ea261402f9 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -99,6 +99,7 @@ def lookupBuiltin : SearchIndex {
// Dot builtin record:
def : DemangledBuiltin<"dot", OpenCL_std, Dot, 2, 2>;
+def : DemangledBuiltin<"__spirv_Dot", OpenCL_std, Dot, 2, 2>;
// Image builtin records:
def : DemangledBuiltin<"read_imagei", OpenCL_std, ReadImage, 2, 4>;
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
index 9fd9de5295630..8a19fc78238c6 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
@@ -1,3 +1,6 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
diff --git a/llvm/test/CodeGen/SPIRV/instructions/integer-casts.ll b/llvm/test/CodeGen/SPIRV/instructions/integer-casts.ll
index fda7567acc61d..18c39ac939879 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/integer-casts.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/integer-casts.ll
@@ -1,4 +1,8 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; CHECK-DAG: OpName [[TRUNC32_16:%.*]] "i32toi16"
; CHECK-DAG: OpName [[TRUNC32_8:%.*]] "i32toi8"
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpDot.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpDot.ll
index c5042c2b8229f..58fcc3688c89d 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpDot.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpDot.ll
@@ -1,4 +1,11 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-SPIRV-DAG: %[[#TyFloat:]] = OpTypeFloat 32
+; CHECK-SPIRV-DAG: %[[#TyHalf:]] = OpTypeFloat 16
;; The OpDot operands must be vectors; check that translating dot with
;; scalar arguments does not result in OpDot.
@@ -16,15 +23,21 @@ entry:
;; The OpDot operands must be vectors; check that translating dot with
;; vector arguments results in OpDot.
; CHECK-SPIRV-LABEL: %[[#]] = OpFunction %[[#]] None %[[#]]
-; CHECK-SPIRV: %[[#]] = OpDot %[[#]] %[[#]] %[[#]]
+; CHECK-SPIRV: %[[#]] = OpDot %[[#TyFloat]] %[[#]] %[[#]]
+; CHECK-SPIRV: %[[#]] = OpDot %[[#TyFloat]] %[[#]] %[[#]]
+; CHECK-SPIRV: %[[#]] = OpDot %[[#TyHalf]] %[[#]] %[[#]]
; CHECK-SPIRV: OpFunctionEnd
-define spir_kernel void @testVector(<2 x float> %f) {
+define spir_kernel void @testVector(<2 x float> %f, <2 x half> %h) {
entry:
%call = tail call spir_func float @_Z3dotDv2_fS_(<2 x float> %f, <2 x float> %f)
+ %call2 = tail call spir_func float @__spirv_Dot(<2 x float> %f, <2 x float> %f)
+ %call3 = tail call spir_func half @_Z11__spirv_DotDv2_DF16_S_(<2 x half> %h, <2 x half> %h)
ret void
}
declare spir_func float @_Z3dotff(float, float)
declare spir_func float @_Z3dotDv2_fS_(<2 x float>, <2 x float>)
+declare spir_func float @__spirv_Dot(<2 x float>, <2 x float>)
+declare spir_func half @_Z11__spirv_DotDv2_DF16_S_(<2 x half>, <2 x half>)
>From c63fd19db64e8e97f406a161b76ef271f69bd43c Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 3 Jul 2024 09:18:33 -0700
Subject: [PATCH 7/7] tweak
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 989ea261402f9..5c057a79afa0c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -1428,7 +1428,6 @@ defm : DemangledNativeBuiltin<"__spirv_SatConvertSToU", OpenCL_std, Convert, 1,
defm : DemangledNativeBuiltin<"__spirv_SatConvertUToS", OpenCL_std, Convert, 1, 1, OpSatConvertUToS>;
defm : DemangledNativeBuiltin<"__spirv_ConvertUToPtr", OpenCL_std, Convert, 1, 1, OpConvertUToPtr>;
-
// cl_intel_bfloat16_conversions / SPV_INTEL_bfloat16_conversion
// Multiclass used to define at the same time both a demangled builtin records
// and a corresponding convert builtin records.
More information about the llvm-commits
mailing list