[llvm] Add support for SPIR-V extension: SPV_INTEL_subgroups (PR #81023)
Vyacheslav Levytskyy via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 7 10:41:24 PST 2024
https://github.com/VyacheslavLevytskyy created https://github.com/llvm/llvm-project/pull/81023
The goal of this PR is to implement SPV_INTEL_subgroups extension in SPIR-V Backend.
>From a4559a515b7b4c3faedb8681b75695b0d52296e8 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 7 Feb 2024 10:38:54 -0800
Subject: [PATCH] add initial support for the SPV_INTEL_subgroups extension
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 78 ++++++++
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 58 +++++-
llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 18 ++
llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 23 +++
llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp | 5 +
.../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 6 +-
.../cl_intel_sub_groups.ll | 189 ++++++++++++++++++
7 files changed, 373 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index e4593e7db90e8b..8721b900c8beee 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -13,6 +13,7 @@
#include "SPIRVBuiltins.h"
#include "SPIRV.h"
+#include "SPIRVSubtarget.h"
#include "SPIRVUtils.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -82,6 +83,16 @@ struct GroupBuiltin {
#define GET_GroupBuiltins_DECL
#define GET_GroupBuiltins_IMPL
+struct IntelSubgroupsBuiltin {
+ StringRef Name;
+ uint32_t Opcode;
+ bool IsBlock;
+ bool IsWrite;
+};
+
+#define GET_IntelSubgroupsBuiltins_DECL
+#define GET_IntelSubgroupsBuiltins_IMPL
+
struct GetBuiltin {
StringRef Name;
InstructionSet::InstructionSet Set;
@@ -549,6 +560,7 @@ static bool buildAtomicCompareExchangeInst(const SPIRV::IncomingCall *Call,
assert(GR->getSPIRVTypeForVReg(ObjectPtr)->getOpcode() ==
SPIRV::OpTypePointer);
unsigned ExpectedType = GR->getSPIRVTypeForVReg(ExpectedArg)->getOpcode();
+ (void)ExpectedType;
assert(IsCmpxchg ? ExpectedType == SPIRV::OpTypeInt
: ExpectedType == SPIRV::OpTypePointer);
assert(GR->isScalarOfType(Desired, SPIRV::OpTypeInt));
@@ -849,6 +861,7 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call,
if (GroupBuiltin->HasBoolArg) {
Register ConstRegister = Call->Arguments[0];
auto ArgInstruction = getDefInstrMaybeConstant(ConstRegister, MRI);
+ (void)ArgInstruction;
// TODO: support non-constant bool values.
assert(ArgInstruction->getOpcode() == TargetOpcode::G_CONSTANT &&
"Only constant bool value args are supported");
@@ -900,6 +913,67 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call,
return true;
}
+static bool generateIntelSubgroupsInst(const SPIRV::IncomingCall *Call,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVGlobalRegistry *GR) {
+ const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
+ MachineFunction &MF = MIRBuilder.getMF();
+ const auto *ST = static_cast<const SPIRVSubtarget *>(&MF.getSubtarget());
+ if (!ST->canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) {
+ std::string DiagMsg = std::string(Builtin->Name) +
+ ": the builtin requires the following SPIR-V "
+ "extension: SPV_INTEL_subgroups";
+ report_fatal_error(DiagMsg.c_str(), false);
+ }
+ const SPIRV::IntelSubgroupsBuiltin *IntelSubgroups =
+ SPIRV::lookupIntelSubgroupsBuiltin(Builtin->Name);
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+ uint32_t OpCode = IntelSubgroups->Opcode;
+ if (IntelSubgroups->IsBlock) {
+ // Minimal number or arguments set in TableGen records is 1
+ if (SPIRVType *Arg0Type = GR->getSPIRVTypeForVReg(Call->Arguments[0])) {
+ if (Arg0Type->getOpcode() == SPIRV::OpTypeImage) {
+ // TODO: add required validation from the specification:
+ // "'Image' must be an object whose type is OpTypeImage with a 'Sampled'
+ // operand of 0 or 2. If the 'Sampled' operand is 2, then some
+ // dimensions require a capability."
+ switch (OpCode) {
+ case SPIRV::OpSubgroupBlockReadINTEL:
+ OpCode = SPIRV::OpSubgroupImageBlockReadINTEL;
+ break;
+ case SPIRV::OpSubgroupBlockWriteINTEL:
+ OpCode = SPIRV::OpSubgroupImageBlockWriteINTEL;
+ break;
+ }
+ }
+ }
+ }
+
+ // TODO: opaque pointers types should be eventually resolved in such a way
+ // that validation of block read is enabled with respect to the following
+ // specification requirement:
+ // "'Result Type' may be a scalar or vector type, and its component type must
+ // be equal to the type pointed to by 'Ptr'."
+ // For example, function parameter type should not be default i8 pointer, but
+ // depend on the result type of the instruction where it is used as a pointer
+ // argument of OpSubgroupBlockReadINTEL
+
+ // Build Intel subgroups instruction
+ MachineInstrBuilder MIB =
+ IntelSubgroups->IsWrite
+ ? MIRBuilder.buildInstr(OpCode)
+ : MIRBuilder.buildInstr(OpCode)
+ .addDef(Call->ReturnRegister)
+ .addUse(GR->getSPIRVTypeID(Call->ReturnType));
+ for (size_t i = 0; i < Call->Arguments.size(); ++i) {
+ MIB.addUse(Call->Arguments[i]);
+ MRI->setRegClass(Call->Arguments[i], &SPIRV::IDRegClass);
+ }
+
+ return true;
+}
+
// These queries ask for a single size_t result for a given dimension index, e.g
// size_t get_global_id(uint dimindex). In SPIR-V, the builtins corresonding to
// these values are all vec3 types, so we need to extract the correct index or
@@ -1199,6 +1273,7 @@ static bool generateImageMiscQueryInst(const SPIRV::IncomingCall *Call,
MIRBuilder.getMRI()->setRegClass(Image, &SPIRV::IDRegClass);
SPIRV::Dim::Dim ImageDimensionality = static_cast<SPIRV::Dim::Dim>(
GR->getSPIRVTypeForVReg(Image)->getOperand(2).getImm());
+ (void)ImageDimensionality;
switch (Opcode) {
case SPIRV::OpImageQuerySamples:
@@ -1976,6 +2051,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
return generateVectorLoadStoreInst(Call.get(), MIRBuilder, GR);
case SPIRV::LoadStore:
return generateLoadStoreInst(Call.get(), MIRBuilder, GR);
+ case SPIRV::IntelSubgroups:
+ return generateIntelSubgroupsInst(Call.get(), MIRBuilder, GR);
}
return false;
}
@@ -2119,6 +2196,7 @@ parseBuiltinTypeNameToTargetExtType(std::string TypeName,
for (unsigned i = HasTypeParameter ? 1 : 0; i < Parameters.size(); i++) {
unsigned IntParameter = 0;
bool ValidLiteral = !Parameters[i].getAsInteger(10, IntParameter);
+ (void)ValidLiteral;
assert(ValidLiteral &&
"Invalid format of SPIR-V builtin parameter literal!");
IntParameters.push_back(IntParameter);
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 8acd4691787e4c..4013dd22f4ab57 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -54,6 +54,7 @@ def Enqueue : BuiltinGroup;
def AsyncCopy : BuiltinGroup;
def VectorLoadStore : BuiltinGroup;
def LoadStore : BuiltinGroup;
+def IntelSubgroups : BuiltinGroup;
//===----------------------------------------------------------------------===//
// Class defining a demangled builtin record. The information in the record
@@ -625,7 +626,7 @@ def GroupBuiltins : GenericTable {
"IsBallotFindBit", "IsLogical", "NoGroupOperation", "HasBoolArg"];
}
-// Function to lookup native builtins by their name and set.
+// Function to lookup group builtins by their name and set.
def lookupGroupBuiltin : SearchIndex {
let Table = GroupBuiltins;
let Key = ["Name"];
@@ -871,6 +872,61 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_logical_xors", Wo
defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_logical_xors", WorkOrSub, OpGroupNonUniformLogicalXor>;
defm : DemangledGroupBuiltin<"group_clustered_reduce_logical_xor", WorkOrSub, OpGroupNonUniformLogicalXor>;
+//===----------------------------------------------------------------------===//
+// Class defining a sub group builtin that should be translated into a
+// SPIR-V instruction using the SPV_INTEL_subgroups extension.
+//
+// name is the demangled name of the given builtin.
+// opcode specifies the SPIR-V operation code of the generated instruction.
+//===----------------------------------------------------------------------===//
+class IntelSubgroupsBuiltin<string name, Op operation> {
+ string Name = name;
+ Op Opcode = operation;
+ bit IsBlock = !or(!eq(operation, OpSubgroupBlockReadINTEL),
+ !eq(operation, OpSubgroupBlockWriteINTEL));
+ bit IsWrite = !eq(operation, OpSubgroupBlockWriteINTEL);
+}
+
+// Table gathering all the Intel sub group builtins.
+def IntelSubgroupsBuiltins : GenericTable {
+ let FilterClass = "IntelSubgroupsBuiltin";
+ let Fields = ["Name", "Opcode", "IsBlock", "IsWrite"];
+}
+
+// Function to lookup group builtins by their name and set.
+def lookupIntelSubgroupsBuiltin : SearchIndex {
+ let Table = IntelSubgroupsBuiltins;
+ let Key = ["Name"];
+}
+
+// Multiclass used to define incoming builtin records for the SPV_INTEL_subgroups extension
+// and corresponding work/sub group builtin records.
+multiclass DemangledIntelSubgroupsBuiltin<string name, bits<8> minNumArgs, bits<8> maxNumArgs, Op operation> {
+ def : DemangledBuiltin<!strconcat("intel_sub_group_", name), OpenCL_std, IntelSubgroups, minNumArgs, maxNumArgs>;
+ def : IntelSubgroupsBuiltin<!strconcat("intel_sub_group_", name), operation>;
+}
+
+// cl_intel_subgroups
+defm : DemangledIntelSubgroupsBuiltin<"shuffle", 2, 2, OpSubgroupShuffleINTEL>;
+defm : DemangledIntelSubgroupsBuiltin<"shuffle_down", 3, 3, OpSubgroupShuffleDownINTEL>;
+defm : DemangledIntelSubgroupsBuiltin<"shuffle_up", 3, 3, OpSubgroupShuffleUpINTEL>;
+defm : DemangledIntelSubgroupsBuiltin<"shuffle_xor", 2, 2, OpSubgroupShuffleXorINTEL>;
+foreach i = ["", "2", "4", "8"] in {
+ // cl_intel_subgroups
+ defm : DemangledIntelSubgroupsBuiltin<!strconcat("block_read", i), 1, 2, OpSubgroupBlockReadINTEL>;
+ defm : DemangledIntelSubgroupsBuiltin<!strconcat("block_write", i), 2, 3, OpSubgroupBlockWriteINTEL>;
+ // cl_intel_subgroups_short
+ defm : DemangledIntelSubgroupsBuiltin<!strconcat("block_read_ui", i), 1, 2, OpSubgroupBlockReadINTEL>;
+ defm : DemangledIntelSubgroupsBuiltin<!strconcat("block_write_ui", i), 2, 3, OpSubgroupBlockWriteINTEL>;
+}
+// cl_intel_subgroups_char, cl_intel_subgroups_short, cl_intel_subgroups_long
+foreach i = ["", "2", "4", "8", "16"] in {
+ foreach j = ["c", "s", "l"] in {
+ defm : DemangledIntelSubgroupsBuiltin<!strconcat("block_read_u", j, i), 1, 2, OpSubgroupBlockReadINTEL>;
+ defm : DemangledIntelSubgroupsBuiltin<!strconcat("block_write_u", j, i), 2, 3, OpSubgroupBlockWriteINTEL>;
+ }
+}
+// OpSubgroupImageBlockReadINTEL and OpSubgroupImageBlockWriteINTEL are to be resolved later on (in code)
//===----------------------------------------------------------------------===//
// Class defining a get builtin record used for lowering builtin calls such as
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index da033ba32624cc..caf2ae43480b1c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -761,3 +761,21 @@ def OpGroupNonUniformBitwiseXor: OpGroupNUGroup<"BitwiseXor", 361>;
def OpGroupNonUniformLogicalAnd: OpGroupNUGroup<"LogicalAnd", 362>;
def OpGroupNonUniformLogicalOr: OpGroupNUGroup<"LogicalOr", 363>;
def OpGroupNonUniformLogicalXor: OpGroupNUGroup<"LogicalXor", 364>;
+
+// 3.49.21. Group and Subgroup Instructions
+def OpSubgroupShuffleINTEL: Op<5571, (outs ID:$res), (ins TYPE:$type, ID:$data, ID:$invocationId),
+ "$res = OpSubgroupShuffleINTEL $type $data $invocationId">;
+def OpSubgroupShuffleDownINTEL: Op<5572, (outs ID:$res), (ins TYPE:$type, ID:$current, ID:$next, ID:$delta),
+ "$res = OpSubgroupShuffleDownINTEL $type $current $next $delta">;
+def OpSubgroupShuffleUpINTEL: Op<5573, (outs ID:$res), (ins TYPE:$type, ID:$previous, ID:$current, ID:$delta),
+ "$res = OpSubgroupShuffleUpINTEL $type $previous $current $delta">;
+def OpSubgroupShuffleXorINTEL: Op<5574, (outs ID:$res), (ins TYPE:$type, ID:$data, ID:$value),
+ "$res = OpSubgroupShuffleXorINTEL $type $data $value">;
+def OpSubgroupBlockReadINTEL: Op<5575, (outs ID:$res), (ins TYPE:$type, ID:$ptr),
+ "$res = OpSubgroupBlockReadINTEL $type $ptr">;
+def OpSubgroupBlockWriteINTEL: Op<5576, (outs), (ins ID:$ptr, ID:$data),
+ "OpSubgroupBlockWriteINTEL $ptr $data">;
+def OpSubgroupImageBlockReadINTEL: Op<5577, (outs ID:$res), (ins TYPE:$type, ID:$image, ID:$coordinate),
+ "$res = OpSubgroupImageBlockReadINTEL $type $image $coordinate">;
+def OpSubgroupImageBlockWriteINTEL: Op<5578, (outs), (ins ID:$image, ID:$coordinate, ID:$data),
+ "OpSubgroupImageBlockWriteINTEL $image $coordinate $data">;
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 370da046984f93..2dfb71dad193aa 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -908,6 +908,29 @@ void addInstrRequirements(const MachineInstr &MI,
case SPIRV::OpGroupNonUniformBallotFindMSB:
Reqs.addCapability(SPIRV::Capability::GroupNonUniformBallot);
break;
+ case SPIRV::OpSubgroupShuffleINTEL:
+ case SPIRV::OpSubgroupShuffleDownINTEL:
+ case SPIRV::OpSubgroupShuffleUpINTEL:
+ case SPIRV::OpSubgroupShuffleXorINTEL:
+ if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_subgroups);
+ Reqs.addCapability(SPIRV::Capability::SubgroupShuffleINTEL);
+ }
+ break;
+ case SPIRV::OpSubgroupBlockReadINTEL:
+ case SPIRV::OpSubgroupBlockWriteINTEL:
+ if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_subgroups);
+ Reqs.addCapability(SPIRV::Capability::SubgroupBufferBlockIOINTEL);
+ }
+ break;
+ case SPIRV::OpSubgroupImageBlockReadINTEL:
+ case SPIRV::OpSubgroupImageBlockWriteINTEL:
+ if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_subgroups);
+ Reqs.addCapability(SPIRV::Capability::SubgroupImageBlockIOINTEL);
+ }
+ break;
case SPIRV::OpAssumeTrueKHR:
case SPIRV::OpExpectKHR:
if (ST.canUseExtension(SPIRV::Extension::SPV_KHR_expect_assume)) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
index cf6dfb127cdebf..6eb81f2deb3ab2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
@@ -37,6 +37,11 @@ cl::list<SPIRV::Extension::Extension> Extensions(
clEnumValN(SPIRV::Extension::SPV_INTEL_optnone, "SPV_INTEL_optnone",
"Adds OptNoneINTEL value for Function Control mask that "
"indicates a request to not optimize the function"),
+ clEnumValN(SPIRV::Extension::SPV_INTEL_subgroups, "SPV_INTEL_subgroups",
+ "Allows work items in a subgroup to share data without the "
+ "use of local memory and work group barriers, and to "
+ "utilize specialized hardware to load and store blocks of "
+ "data from images or buffers."),
clEnumValN(SPIRV::Extension::SPV_KHR_no_integer_wrap_decoration,
"SPV_KHR_no_integer_wrap_decoration",
"Adds decorations to indicate that a given instruction does "
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index ac92ee4a0756a5..58ba7781b7777c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -431,9 +431,9 @@ defm InputAttachmentArrayNonUniformIndexingEXT : CapabilityOperand<5310, 0, 0, [
defm UniformTexelBufferArrayNonUniformIndexingEXT : CapabilityOperand<5311, 0, 0, [], [SampledBuffer, ShaderNonUniformEXT]>;
defm StorageTexelBufferArrayNonUniformIndexingEXT : CapabilityOperand<5312, 0, 0, [], [ImageBuffer, ShaderNonUniformEXT]>;
defm RayTracingNV : CapabilityOperand<5340, 0, 0, [], [Shader]>;
-defm SubgroupShuffleINTEL : CapabilityOperand<5568, 0, 0, [], []>;
-defm SubgroupBufferBlockIOINTEL : CapabilityOperand<5569, 0, 0, [], []>;
-defm SubgroupImageBlockIOINTEL : CapabilityOperand<5570, 0, 0, [], []>;
+defm SubgroupShuffleINTEL : CapabilityOperand<5568, 0, 0, [SPV_INTEL_subgroups], []>;
+defm SubgroupBufferBlockIOINTEL : CapabilityOperand<5569, 0, 0, [SPV_INTEL_subgroups], []>;
+defm SubgroupImageBlockIOINTEL : CapabilityOperand<5570, 0, 0, [SPV_INTEL_subgroups], []>;
defm SubgroupImageMediaBlockIOINTEL : CapabilityOperand<5579, 0, 0, [], []>;
defm SubgroupAvcMotionEstimationINTEL : CapabilityOperand<5696, 0, 0, [], []>;
defm SubgroupAvcMotionEstimationIntraINTEL : CapabilityOperand<5697, 0, 0, [], []>;
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
new file mode 100644
index 00000000000000..0e0b2a4dd6ec2c
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
@@ -0,0 +1,189 @@
+; Modified from: https://github.com/KhronosGroup/SPIRV-LLVM-Translator/test/extensions/INTEL/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
+
+;Source:
+;void __kernel test(float2 x, uint c,
+; read_only image2d_t image_in,
+; write_only image2d_t image_out,
+; int2 coord,
+; __global uint* p,
+; __global ushort* sp,
+; __global uchar* cp,
+; __global ulong* lp) {
+; intel_sub_group_shuffle(x, c);
+; intel_sub_group_shuffle_down(x, x, c);
+; intel_sub_group_shuffle_up(x, x, c);
+; intel_sub_group_shuffle_xor(x, c);
+;
+; uint2 ui2 = intel_sub_group_block_read2(image_in, coord);
+; intel_sub_group_block_write2(image_out, coord, ui2);
+; ui2 = intel_sub_group_block_read2(p);
+; intel_sub_group_block_write2(p, ui2);
+;
+; ushort2 us2 = intel_sub_group_block_read_us2(image_in, coord);
+; intel_sub_group_block_write_us2(image_out, coord, us2);
+; us2 = intel_sub_group_block_read_us2(sp);
+; intel_sub_group_block_write_us2(sp, us2);
+;
+; uchar2 uc2 = intel_sub_group_block_read_uc2(image_in, coord);
+; intel_sub_group_block_write_uc2(image_out, coord, uc2);
+; uc2 = intel_sub_group_block_read_uc2(cp);
+; intel_sub_group_block_write_uc2(cp, uc2);
+;
+; ulong2 ul2 = intel_sub_group_block_read_ul2(image_in, coord);
+; intel_sub_group_block_write_ul2(image_out, coord, ul2);
+; ul2 = intel_sub_group_block_read_ul2(lp);
+; intel_sub_group_block_write_ul2(lp, ul2);
+;}
+
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_INTEL_subgroups %s -o - | FileCheck %s
+
+; CHECK-ERROR: LLVM ERROR: intel_sub_group_shuffle: the builtin requires the following SPIR-V extension: SPV_INTEL_subgroups
+
+; CHECK-DAG: Capability SubgroupShuffleINTEL
+; CHECK-DAG: Capability SubgroupBufferBlockIOINTEL
+; CHECK-DAG: Capability SubgroupImageBlockIOINTEL
+; CHECK: Extension "SPV_INTEL_subgroups"
+
+; CHECK-SPIRV-LABEL: Function
+; CHECK-SPIRV-LABEL: Label
+
+; CHECK: SubgroupShuffleINTEL
+; CHECK: SubgroupShuffleDownINTEL
+; CHECK: SubgroupShuffleUpINTEL
+; CHECK: SubgroupShuffleXorINTEL
+
+; CHECK: SubgroupImageBlockReadINTEL
+; CHECK: SubgroupImageBlockWriteINTEL
+; CHECK: SubgroupBlockReadINTEL
+; CHECK: SubgroupBlockWriteINTEL
+
+; CHECK: SubgroupImageBlockReadINTEL
+; CHECK: SubgroupImageBlockWriteINTEL
+; CHECK: SubgroupBlockReadINTEL
+; CHECK: SubgroupBlockWriteINTEL
+
+; CHECK: SubgroupImageBlockReadINTEL
+; CHECK: SubgroupImageBlockWriteINTEL
+; CHECK: SubgroupBlockReadINTEL
+; CHECK: SubgroupBlockWriteINTEL
+
+; CHECK: SubgroupImageBlockReadINTEL
+; CHECK: SubgroupImageBlockWriteINTEL
+; CHECK: SubgroupBlockReadINTEL
+; CHECK: SubgroupBlockWriteINTEL
+
+; CHECK-SPIRV-LABEL: Return
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64"
+
+%opencl.image2d_ro_t = type opaque
+%opencl.image2d_wo_t = type opaque
+
+; Function Attrs: convergent nounwind
+define spir_kernel void @test(<2 x float> %x, i32 %c, ptr addrspace(1) %image_in, ptr addrspace(1) %image_out, <2 x i32> %coord, ptr addrspace(1) %p, ptr addrspace(1) %sp, ptr addrspace(1) %cp, ptr addrspace(1) %lp) local_unnamed_addr #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 !kernel_arg_name !6 {
+entry:
+ %call = tail call spir_func <2 x float> @_Z23intel_sub_group_shuffleDv2_fj(<2 x float> %x, i32 %c) #2
+ %call1 = tail call spir_func <2 x float> @_Z28intel_sub_group_shuffle_downDv2_fS_j(<2 x float> %x, <2 x float> %x, i32 %c) #2
+ %call2 = tail call spir_func <2 x float> @_Z26intel_sub_group_shuffle_upDv2_fS_j(<2 x float> %x, <2 x float> %x, i32 %c) #2
+ %call3 = tail call spir_func <2 x float> @_Z27intel_sub_group_shuffle_xorDv2_fj(<2 x float> %x, i32 %c) #2
+
+ %call4 = tail call spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2
+ tail call spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i32> %call4) #2
+ %call5 = tail call spir_func <2 x i32> @_Z27intel_sub_group_block_read2PU3AS1Kj(ptr addrspace(1) %p) #2
+ tail call spir_func void @_Z28intel_sub_group_block_write2PU3AS1jDv2_j(ptr addrspace(1) %p, <2 x i32> %call5) #2
+
+ %call6 = tail call spir_func <2 x i16> @_Z30intel_sub_group_block_read_us214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2
+ tail call spir_func void @_Z31intel_sub_group_block_write_us214ocl_image2d_woDv2_iDv2_t(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i16> %call6) #2
+ %call7 = tail call spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2PU3AS1Kt(ptr addrspace(1) %sp) #2
+ tail call spir_func void @_Z31intel_sub_group_block_write_us2PU3AS1tDv2_t(ptr addrspace(1) %sp, <2 x i16> %call7) #2
+
+ %call8 = tail call spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2
+ tail call spir_func void @_Z31intel_sub_group_block_write_uc214ocl_image2d_woDv2_iDv2_h(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i8> %call8) #2
+ %call9 = tail call spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc2PU3AS1Kh(ptr addrspace(1) %cp) #2
+ tail call spir_func void @_Z31intel_sub_group_block_write_uc2PU3AS1hDv2_h(ptr addrspace(1) %cp, <2 x i8> %call9) #2
+
+ %call10 = tail call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2
+ tail call spir_func void @_Z31intel_sub_group_block_write_ul214ocl_image2d_woDv2_iDv2_m(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i64> %call10) #2
+ %call11 = tail call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addrspace(1) %lp) #2
+ tail call spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1) %lp, <2 x i64> %call11) #2
+
+ ret void
+}
+
+; Function Attrs: convergent
+declare spir_func <2 x float> @_Z23intel_sub_group_shuffleDv2_fj(<2 x float>, i32) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x float> @_Z28intel_sub_group_shuffle_downDv2_fS_j(<2 x float>, <2 x float>, i32) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x float> @_Z26intel_sub_group_shuffle_upDv2_fS_j(<2 x float>, <2 x float>, i32) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x float> @_Z27intel_sub_group_shuffle_xorDv2_fj(<2 x float>, i32) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(ptr addrspace(1), <2 x i32>, <2 x i32>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x i32> @_Z27intel_sub_group_block_read2PU3AS1Kj(ptr addrspace(1)) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func void @_Z28intel_sub_group_block_write2PU3AS1jDv2_j(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x i16> @_Z30intel_sub_group_block_read_us214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func void @_Z31intel_sub_group_block_write_us214ocl_image2d_woDv2_iDv2_t(ptr addrspace(1), <2 x i32>, <2 x i16>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2PU3AS1Kt(ptr addrspace(1)) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func void @_Z31intel_sub_group_block_write_us2PU3AS1tDv2_t(ptr addrspace(1), <2 x i16>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func void @_Z31intel_sub_group_block_write_uc214ocl_image2d_woDv2_iDv2_h(ptr addrspace(1), <2 x i32>, <2 x i8>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc2PU3AS1Kh(ptr addrspace(1)) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func void @_Z31intel_sub_group_block_write_uc2PU3AS1hDv2_h(ptr addrspace(1), <2 x i8>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func void @_Z31intel_sub_group_block_write_ul214ocl_image2d_woDv2_iDv2_m(ptr addrspace(1), <2 x i32>, <2 x i64>) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addrspace(1)) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1), <2 x i64>) local_unnamed_addr #1
+
+attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { convergent nounwind }
+
+!opencl.ocl.version = !{!0}
+!opencl.spir.version = !{!0}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 0, i32 0, i32 1, i32 1, i32 0, i32 1, i32 1, i32 1, i32 1}
+!2 = !{!"none", !"none", !"read_only", !"write_only", !"none", !"none", !"none", !"none", !"none"}
+!3 = !{!"float2", !"uint", !"image2d_t", !"image2d_t", !"int2", !"uint*", !"ushort*", !"uchar*", !"ulong*"}
+!4 = !{!"float __attribute__((ext_vector_type(2)))", !"uint", !"image2d_t", !"image2d_t", !"int __attribute__((ext_vector_type(2)))", !"uint*", !"ushort*", !"uchar*", !"ulong*"}
+!5 = !{!"", !"", !"", !"", !"", !"", !"", !"", !""}
+!6 = !{!"x", !"c", !"image_in", !"image_out", !"coord", !"p", !"sp", !"cp", !"lp"}
More information about the llvm-commits
mailing list