[llvm] b5132b7 - [SPIR-V] Improve type inference: fix types of return values in call lowering (#116609)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 29 11:44:28 PST 2024
Author: Vyacheslav Levytskyy
Date: 2024-11-29T20:44:25+01:00
New Revision: b5132b7d044a5bc83eba9b09bd158cd77a511403
URL: https://github.com/llvm/llvm-project/commit/b5132b7d044a5bc83eba9b09bd158cd77a511403
DIFF: https://github.com/llvm/llvm-project/commit/b5132b7d044a5bc83eba9b09bd158cd77a511403.diff
LOG: [SPIR-V] Improve type inference: fix types of return values in call lowering (#116609)
Goals of the PR are:
* to ensure that correct types are applied to virtual registers which
were used as return values in call lowering. A reproducer is attached as
a new test case, before the PR it fails because spirv-val considers
output invalid due to wrong result/operand types in OpPhi's;
* improve type inference by speeding up postprocessing of types: by
limiting iterations by checking what remains to process, and processing
each instruction just once for any number of operands with uncomplete
types;
* improve type inference by more accurate work with uncomplete types
(pass uncomplete property to dependent operands, ensure consistency of
uncomplete-types data structure);
* change processing order and add traversing of PHI nodes when type
inference apply instructions results to specify/update/cast operands
type (fixes an issue with OpPhi's result type mismatch with operand
types).
Added:
llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll
llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll
llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll
llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll
llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll
llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll
Modified:
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
llvm/lib/Target/SPIRV/SPIRVUtils.cpp
llvm/lib/Target/SPIRV/SPIRVUtils.h
llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
llvm/test/CodeGen/SPIRV/instructions/ret-type.ll
llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll
llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 73dce230575d84..a1684b87722cb2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -469,12 +469,8 @@ static Register buildLoadInst(SPIRVType *BaseType, Register PtrRegister,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR, LLT LowLevelType,
Register DestinationReg = Register(0)) {
- MachineRegisterInfo *MRI = MIRBuilder.getMRI();
- if (!DestinationReg.isValid()) {
- DestinationReg = MRI->createVirtualRegister(&SPIRV::iIDRegClass);
- MRI->setType(DestinationReg, LLT::scalar(64));
- GR->assignSPIRVTypeToVReg(BaseType, DestinationReg, MIRBuilder.getMF());
- }
+ if (!DestinationReg.isValid())
+ DestinationReg = createVirtualRegister(BaseType, GR, MIRBuilder);
// TODO: consider using correct address space and alignment (p0 is canonical
// type for selection though).
MachinePointerInfo PtrInfo = MachinePointerInfo();
@@ -2151,7 +2147,7 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call,
const SPIRVType *PointerSizeTy = GR->getOrCreateSPIRVPointerType(
Int32Ty, MIRBuilder, SPIRV::StorageClass::Function);
for (unsigned I = 0; I < LocalSizeNum; ++I) {
- Register Reg = MRI->createVirtualRegister(&SPIRV::iIDRegClass);
+ Register Reg = MRI->createVirtualRegister(&SPIRV::pIDRegClass);
MRI->setType(Reg, LLType);
GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF());
auto GEPInst = MIRBuilder.buildIntrinsic(
@@ -2539,23 +2535,11 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
SPIRVGlobalRegistry *GR) {
LLVM_DEBUG(dbgs() << "Lowering builtin call: " << DemangledCall << "\n");
- // SPIR-V type and return register.
- Register ReturnRegister = OrigRet;
- SPIRVType *ReturnType = nullptr;
- if (OrigRetTy && !OrigRetTy->isVoidTy()) {
- ReturnType = GR->assignTypeToVReg(OrigRetTy, ReturnRegister, MIRBuilder);
- if (!MIRBuilder.getMRI()->getRegClassOrNull(ReturnRegister))
- MIRBuilder.getMRI()->setRegClass(ReturnRegister,
- GR->getRegClass(ReturnType));
- } else if (OrigRetTy && OrigRetTy->isVoidTy()) {
- ReturnRegister = MIRBuilder.getMRI()->createVirtualRegister(&IDRegClass);
- MIRBuilder.getMRI()->setType(ReturnRegister, LLT::scalar(64));
- ReturnType = GR->assignTypeToVReg(OrigRetTy, ReturnRegister, MIRBuilder);
- }
-
// Lookup the builtin in the TableGen records.
+ SPIRVType *SpvType = GR->getSPIRVTypeForVReg(OrigRet);
+ assert(SpvType && "Inconsistent return register: expected valid type info");
std::unique_ptr<const IncomingCall> Call =
- lookupBuiltin(DemangledCall, Set, ReturnRegister, ReturnType, Args);
+ lookupBuiltin(DemangledCall, Set, OrigRet, SpvType, Args);
if (!Call) {
LLVM_DEBUG(dbgs() << "Builtin record was not found!\n");
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 3c5397319aaf21..3fdaa6aa3257ea 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -539,6 +539,23 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (isFunctionDecl && !DemangledName.empty() &&
(canUseGLSL || canUseOpenCL)) {
+ if (ResVReg.isValid()) {
+ if (!GR->getSPIRVTypeForVReg(ResVReg)) {
+ const Type *RetTy = OrigRetTy;
+ if (auto *PtrRetTy = dyn_cast<PointerType>(OrigRetTy)) {
+ const Value *OrigValue = Info.OrigRet.OrigValue;
+ if (!OrigValue)
+ OrigValue = Info.CB;
+ if (OrigValue)
+ if (Type *ElemTy = GR->findDeducedElementType(OrigValue))
+ RetTy =
+ TypedPointerType::get(ElemTy, PtrRetTy->getAddressSpace());
+ }
+ setRegClassType(ResVReg, RetTy, GR, MIRBuilder);
+ }
+ } else {
+ ResVReg = createVirtualRegister(OrigRetTy, GR, MIRBuilder);
+ }
SmallVector<Register, 8> ArgVRegs;
for (auto Arg : Info.OrigArgs) {
assert(Arg.Regs.size() == 1 && "Call arg has multiple VRegs");
diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
index b82c2538a81368..48df845efd76b1 100644
--- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
@@ -69,16 +69,31 @@ void SPIRVGeneralDuplicatesTracker::buildDepsGraph(
MachineOperand *RegOp = &VRegDef->getOperand(0);
if (Reg2Entry.count(RegOp) == 0 &&
(MI->getOpcode() != SPIRV::OpVariable || i != 3)) {
- std::string DiagMsg;
- raw_string_ostream OS(DiagMsg);
- OS << "Unexpected pattern while building a dependency "
- "graph.\nInstruction: ";
- MI->print(OS);
- OS << "Operand: ";
- Op.print(OS);
- OS << "\nOperand definition: ";
- VRegDef->print(OS);
- report_fatal_error(DiagMsg.c_str());
+ // try to repair the unexpected code pattern
+ bool IsFixed = false;
+ if (VRegDef->getOpcode() == TargetOpcode::G_CONSTANT &&
+ RegOp->isReg() && MRI.getType(RegOp->getReg()).isScalar()) {
+ const Constant *C = VRegDef->getOperand(1).getCImm();
+ add(C, MI->getParent()->getParent(), RegOp->getReg());
+ auto Iter = CT.Storage.find(C);
+ if (Iter != CT.Storage.end()) {
+ SPIRV::DTSortableEntry &MissedEntry = Iter->second;
+ Reg2Entry[RegOp] = &MissedEntry;
+ IsFixed = true;
+ }
+ }
+ if (!IsFixed) {
+ std::string DiagMsg;
+ raw_string_ostream OS(DiagMsg);
+ OS << "Unexpected pattern while building a dependency "
+ "graph.\nInstruction: ";
+ MI->print(OS);
+ OS << "Operand: ";
+ Op.print(OS);
+ OS << "\nOperand definition: ";
+ VRegDef->print(OS);
+ report_fatal_error(DiagMsg.c_str());
+ }
}
if (Reg2Entry.count(RegOp))
E->addDep(Reg2Entry[RegOp]);
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index e6ef40e010dc20..e6f136cc81b4b4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -17,6 +17,7 @@
#include "SPIRVSubtarget.h"
#include "SPIRVTargetMachine.h"
#include "SPIRVUtils.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
@@ -67,7 +68,7 @@ class SPIRVEmitIntrinsics
public InstVisitor<SPIRVEmitIntrinsics, Instruction *> {
SPIRVTargetMachine *TM = nullptr;
SPIRVGlobalRegistry *GR = nullptr;
- Function *F = nullptr;
+ Function *CurrF = nullptr;
bool TrackConstants = true;
bool HaveFunPtrs = false;
DenseMap<Instruction *, Constant *> AggrConsts;
@@ -76,8 +77,33 @@ class SPIRVEmitIntrinsics
SPIRV::InstructionSet::InstructionSet InstrSet;
// a register of Instructions that don't have a complete type definition
- DenseMap<Value *, unsigned> UncompleteTypeInfo;
- SmallVector<Value *> PostprocessWorklist;
+ bool CanTodoType = true;
+ unsigned TodoTypeSz = 0;
+ DenseMap<Value *, bool> TodoType;
+ void insertTodoType(Value *Op) {
+ // TODO: add isa<CallInst>(Op) to no-insert
+ if (CanTodoType && !isa<GetElementPtrInst>(Op)) {
+ auto It = TodoType.try_emplace(Op, true);
+ if (It.second)
+ ++TodoTypeSz;
+ }
+ }
+ void eraseTodoType(Value *Op) {
+ auto It = TodoType.find(Op);
+ if (It != TodoType.end() && It->second) {
+ TodoType[Op] = false;
+ --TodoTypeSz;
+ }
+ }
+ bool isTodoType(Value *Op) {
+ if (isa<GetElementPtrInst>(Op))
+ return false;
+ auto It = TodoType.find(Op);
+ return It != TodoType.end() && It->second;
+ }
+ // a register of Instructions that were visited by deduceOperandElementType()
+ // to validate operand types with an instruction
+ std::unordered_set<Instruction *> TypeValidated;
// well known result types of builtins
enum WellKnownTypes { Event };
@@ -86,7 +112,8 @@ class SPIRVEmitIntrinsics
Type *deduceElementType(Value *I, bool UnknownElemTypeI8);
Type *deduceElementTypeHelper(Value *I, bool UnknownElemTypeI8);
Type *deduceElementTypeHelper(Value *I, std::unordered_set<Value *> &Visited,
- bool UnknownElemTypeI8);
+ bool UnknownElemTypeI8,
+ bool IgnoreKnownType = false);
Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
bool UnknownElemTypeI8);
Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
@@ -105,8 +132,10 @@ class SPIRVEmitIntrinsics
bool UnknownElemTypeI8);
// deduce Types of operands of the Instruction if possible
- void deduceOperandElementType(Instruction *I, Instruction *AskOp = 0,
- Type *AskTy = 0, CallInst *AssignCI = 0);
+ void deduceOperandElementType(Instruction *I,
+ SmallPtrSet<Instruction *, 4> *UncompleteRets,
+ const SmallPtrSet<Value *, 4> *AskOps = nullptr,
+ bool IsPostprocessing = false);
void preprocessCompositeConstants(IRBuilder<> &B);
void preprocessUndefs(IRBuilder<> &B);
@@ -122,6 +151,9 @@ class SPIRVEmitIntrinsics
return B.CreateIntrinsic(IntrID, {Types}, Args);
}
+ Type *reconstructType(Value *Op, bool UnknownElemTypeI8,
+ bool IsPostprocessing);
+
void buildAssignType(IRBuilder<> &B, Type *ElemTy, Value *Arg);
void buildAssignPtr(IRBuilder<> &B, Type *ElemTy, Value *Arg);
void updateAssignType(CallInst *AssignCI, Value *Arg, Value *OfType);
@@ -145,12 +177,31 @@ class SPIRVEmitIntrinsics
Type *deduceFunParamElementType(Function *F, unsigned OpIdx);
Type *deduceFunParamElementType(Function *F, unsigned OpIdx,
std::unordered_set<Function *> &FVisited);
- void replaceWithPtrcasted(Instruction *CI, Type *NewElemTy, Type *KnownElemTy,
- CallInst *AssignCI);
+
+ bool deduceOperandElementTypeCalledFunction(
+ CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+ Type *&KnownElemTy);
+ void deduceOperandElementTypeFunctionPointer(
+ CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+ Type *&KnownElemTy, bool IsPostprocessing);
+
+ CallInst *buildSpvPtrcast(Function *F, Value *Op, Type *ElemTy);
+ void replaceUsesOfWithSpvPtrcast(Value *Op, Type *ElemTy, Instruction *I,
+ DenseMap<Function *, CallInst *> Ptrcasts);
+ void propagateElemType(Value *Op, Type *ElemTy,
+ DenseSet<std::pair<Value *, Value *>> &VisitedSubst);
+ void
+ propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy,
+ DenseSet<std::pair<Value *, Value *>> &VisitedSubst);
+ void propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy,
+ DenseSet<std::pair<Value *, Value *>> &VisitedSubst,
+ std::unordered_set<Value *> &Visited,
+ DenseMap<Function *, CallInst *> Ptrcasts);
+
void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true);
bool runOnFunction(Function &F);
- bool postprocessTypes();
+ bool postprocessTypes(Module &M);
bool processFunctionPointers(Module &M);
public:
@@ -203,10 +254,8 @@ bool expectIgnoredInIRTranslation(const Instruction *I) {
}
bool allowEmitFakeUse(const Value *Arg) {
- if (const auto *II = dyn_cast<IntrinsicInst>(Arg))
- if (Function *F = II->getCalledFunction())
- if (F->getName().starts_with("llvm.spv."))
- return false;
+ if (isSpvIntrinsic(Arg))
+ return false;
if (dyn_cast<AtomicCmpXchgInst>(Arg) || dyn_cast<InsertValueInst>(Arg) ||
dyn_cast<UndefValue>(Arg))
return false;
@@ -280,17 +329,10 @@ void SPIRVEmitIntrinsics::replaceAllUsesWith(Value *Src, Value *Dest,
GR->updateIfExistDeducedElementType(Src, Dest, DeleteOld);
GR->updateIfExistAssignPtrTypeInstr(Src, Dest, DeleteOld);
// Update uncomplete type records if any
- auto It = UncompleteTypeInfo.find(Src);
- if (It == UncompleteTypeInfo.end())
- return;
- if (DeleteOld) {
- unsigned Pos = It->second;
- UncompleteTypeInfo.erase(Src);
- UncompleteTypeInfo[Dest] = Pos;
- PostprocessWorklist[Pos] = Dest;
- } else {
- UncompleteTypeInfo[Dest] = PostprocessWorklist.size();
- PostprocessWorklist.push_back(Dest);
+ if (isTodoType(Src)) {
+ if (DeleteOld)
+ eraseTodoType(Src);
+ insertTodoType(Dest);
}
}
@@ -314,8 +356,11 @@ static inline Type *restoreMutatedType(SPIRVGlobalRegistry *GR, Instruction *I,
// Reconstruct type with nested element types according to deduced type info.
// Return nullptr if no detailed type info is available.
-static inline Type *reconstructType(SPIRVGlobalRegistry *GR, Value *Op) {
+Type *SPIRVEmitIntrinsics::reconstructType(Value *Op, bool UnknownElemTypeI8,
+ bool IsPostprocessing) {
Type *Ty = Op->getType();
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ Ty = restoreMutatedType(GR, OpI, Ty);
if (!isUntypedPointerTy(Ty))
return Ty;
// try to find the pointee type
@@ -323,10 +368,17 @@ static inline Type *reconstructType(SPIRVGlobalRegistry *GR, Value *Op) {
return getTypedPointerWrapper(NestedTy, getPointerAddressSpace(Ty));
// not a pointer according to the type info (e.g., Event object)
CallInst *CI = GR->findAssignPtrTypeInstr(Op);
- if (!CI)
- return nullptr;
- MetadataAsValue *MD = cast<MetadataAsValue>(CI->getArgOperand(1));
- return cast<ConstantAsMetadata>(MD->getMetadata())->getType();
+ if (CI) {
+ MetadataAsValue *MD = cast<MetadataAsValue>(CI->getArgOperand(1));
+ return cast<ConstantAsMetadata>(MD->getMetadata())->getType();
+ }
+ if (UnknownElemTypeI8) {
+ if (!IsPostprocessing)
+ insertTodoType(Op);
+ return getTypedPointerWrapper(IntegerType::getInt8Ty(Op->getContext()),
+ getPointerAddressSpace(Ty));
+ }
+ return nullptr;
}
void SPIRVEmitIntrinsics::buildAssignType(IRBuilder<> &B, Type *Ty,
@@ -354,7 +406,7 @@ void SPIRVEmitIntrinsics::buildAssignPtr(IRBuilder<> &B, Type *ElemTy,
Value *OfType = PoisonValue::get(ElemTy);
CallInst *AssignPtrTyCI = GR->findAssignPtrTypeInstr(Arg);
if (AssignPtrTyCI == nullptr ||
- AssignPtrTyCI->getParent()->getParent() != F) {
+ AssignPtrTyCI->getParent()->getParent() != CurrF) {
AssignPtrTyCI = buildIntrWithMD(
Intrinsic::spv_assign_ptr_type, {Arg->getType()}, OfType, Arg,
{B.getInt32(getPointerAddressSpace(Arg->getType()))}, B);
@@ -379,8 +431,97 @@ void SPIRVEmitIntrinsics::updateAssignType(CallInst *AssignCI, Value *Arg,
GR->addDeducedElementType(Arg, ElemTy);
}
+CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Function *F, Value *Op,
+ Type *ElemTy) {
+ IRBuilder<> B(Op->getContext());
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
+ // spv_ptrcast's argument Op denotes an instruction that generates
+ // a value, and we may use getInsertionPointAfterDef()
+ setInsertPointAfterDef(B, OpI);
+ } else if (auto *OpA = dyn_cast<Argument>(Op)) {
+ B.SetInsertPointPastAllocas(OpA->getParent());
+ B.SetCurrentDebugLocation(DebugLoc());
+ } else {
+ B.SetInsertPoint(F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
+ }
+ Type *OpTy = Op->getType();
+ SmallVector<Type *, 2> Types = {OpTy, OpTy};
+ SmallVector<Value *, 2> Args = {Op, buildMD(PoisonValue::get(ElemTy)),
+ B.getInt32(getPointerAddressSpace(OpTy))};
+ CallInst *PtrCasted =
+ B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+ buildAssignPtr(B, ElemTy, PtrCasted);
+ return PtrCasted;
+}
+
+void SPIRVEmitIntrinsics::replaceUsesOfWithSpvPtrcast(
+ Value *Op, Type *ElemTy, Instruction *I,
+ DenseMap<Function *, CallInst *> Ptrcasts) {
+ Function *F = I->getParent()->getParent();
+ CallInst *PtrCastedI = nullptr;
+ auto It = Ptrcasts.find(F);
+ if (It == Ptrcasts.end()) {
+ PtrCastedI = buildSpvPtrcast(F, Op, ElemTy);
+ Ptrcasts[F] = PtrCastedI;
+ } else {
+ PtrCastedI = It->second;
+ }
+ I->replaceUsesOfWith(Op, PtrCastedI);
+}
+
+void SPIRVEmitIntrinsics::propagateElemType(
+ Value *Op, Type *ElemTy,
+ DenseSet<std::pair<Value *, Value *>> &VisitedSubst) {
+ DenseMap<Function *, CallInst *> Ptrcasts;
+ SmallVector<User *> Users(Op->users());
+ for (auto *U : Users) {
+ if (!isa<Instruction>(U) || isa<BitCastInst>(U) || isSpvIntrinsic(U))
+ continue;
+ if (!VisitedSubst.insert(std::make_pair(U, Op)).second)
+ continue;
+ Instruction *UI = dyn_cast<Instruction>(U);
+ // If the instruction was validated already, we need to keep it valid by
+ // keeping current Op type.
+ if (isa<GetElementPtrInst>(UI) ||
+ TypeValidated.find(UI) != TypeValidated.end())
+ replaceUsesOfWithSpvPtrcast(Op, ElemTy, UI, Ptrcasts);
+ }
+}
+
+void SPIRVEmitIntrinsics::propagateElemTypeRec(
+ Value *Op, Type *PtrElemTy, Type *CastElemTy,
+ DenseSet<std::pair<Value *, Value *>> &VisitedSubst) {
+ std::unordered_set<Value *> Visited;
+ DenseMap<Function *, CallInst *> Ptrcasts;
+ propagateElemTypeRec(Op, PtrElemTy, CastElemTy, VisitedSubst, Visited,
+ Ptrcasts);
+}
+
+void SPIRVEmitIntrinsics::propagateElemTypeRec(
+ Value *Op, Type *PtrElemTy, Type *CastElemTy,
+ DenseSet<std::pair<Value *, Value *>> &VisitedSubst,
+ std::unordered_set<Value *> &Visited,
+ DenseMap<Function *, CallInst *> Ptrcasts) {
+ if (!Visited.insert(Op).second)
+ return;
+ SmallVector<User *> Users(Op->users());
+ for (auto *U : Users) {
+ if (!isa<Instruction>(U) || isa<BitCastInst>(U) || isSpvIntrinsic(U))
+ continue;
+ if (!VisitedSubst.insert(std::make_pair(U, Op)).second)
+ continue;
+ Instruction *UI = dyn_cast<Instruction>(U);
+ // If the instruction was validated already, we need to keep it valid by
+ // keeping current Op type.
+ if (isa<GetElementPtrInst>(UI) ||
+ TypeValidated.find(UI) != TypeValidated.end())
+ replaceUsesOfWithSpvPtrcast(Op, CastElemTy, UI, Ptrcasts);
+ }
+}
+
// Set element pointer type to the given value of ValueTy and tries to
// specify this type further (recursively) by Operand value, if needed.
+
Type *
SPIRVEmitIntrinsics::deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
bool UnknownElemTypeI8) {
@@ -455,23 +596,22 @@ void SPIRVEmitIntrinsics::maybeAssignPtrType(Type *&Ty, Value *Op, Type *RefTy,
if (isUntypedPointerTy(RefTy)) {
if (!UnknownElemTypeI8)
return;
- if (auto *I = dyn_cast<Instruction>(Op)) {
- UncompleteTypeInfo[I] = PostprocessWorklist.size();
- PostprocessWorklist.push_back(I);
- }
+ insertTodoType(Op);
}
Ty = RefTy;
}
Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
- Value *I, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8) {
+ Value *I, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8,
+ bool IgnoreKnownType) {
// allow to pass nullptr as an argument
if (!I)
return nullptr;
// maybe already known
- if (Type *KnownTy = GR->findDeducedElementType(I))
- return KnownTy;
+ if (!IgnoreKnownType)
+ if (Type *KnownTy = GR->findDeducedElementType(I))
+ return KnownTy;
// maybe a cycle
if (!Visited.insert(I).second)
@@ -483,7 +623,22 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
if (auto *Ref = dyn_cast<AllocaInst>(I)) {
maybeAssignPtrType(Ty, I, Ref->getAllocatedType(), UnknownElemTypeI8);
} else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
- Ty = Ref->getResultElementType();
+ // TODO: not sure if GetElementPtrInst::getTypeAtIndex() does anything
+ // useful here
+ if (isNestedPointer(Ref->getSourceElementType())) {
+ Ty = Ref->getSourceElementType();
+ for (Use &U : drop_begin(Ref->indices()))
+ Ty = GetElementPtrInst::getTypeAtIndex(Ty, U.get());
+ } else {
+ Ty = Ref->getResultElementType();
+ }
+ } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
+ Value *Op = Ref->getPointerOperand();
+ Type *KnownTy = GR->findDeducedElementType(Op);
+ if (!KnownTy)
+ KnownTy = Op->getType();
+ if (Type *ElemTy = getPointeeType(KnownTy))
+ maybeAssignPtrType(Ty, I, ElemTy, UnknownElemTypeI8);
} else if (auto *Ref = dyn_cast<GlobalValue>(I)) {
Ty = deduceElementTypeByValueDeep(
Ref->getValueType(),
@@ -559,7 +714,7 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
}
// remember the found relationship
- if (Ty) {
+ if (Ty && !IgnoreKnownType) {
// specify nested types if needed, otherwise return unchanged
GR->addDeducedElementType(I, Ty);
}
@@ -601,7 +756,7 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
if (Type *NestedTy =
deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
- Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
+ Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
} else {
Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
UnknownElemTypeI8);
@@ -622,7 +777,7 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
if (Type *NestedTy =
deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
- Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
+ Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
} else {
Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
UnknownElemTypeI8);
@@ -661,10 +816,7 @@ Type *SPIRVEmitIntrinsics::deduceElementType(Value *I, bool UnknownElemTypeI8) {
return Ty;
if (!UnknownElemTypeI8)
return nullptr;
- if (auto *Instr = dyn_cast<Instruction>(I)) {
- UncompleteTypeInfo[Instr] = PostprocessWorklist.size();
- PostprocessWorklist.push_back(Instr);
- }
+ insertTodoType(I);
return IntegerType::getInt8Ty(I->getContext());
}
@@ -683,10 +835,9 @@ static inline Type *getAtomicElemTy(SPIRVGlobalRegistry *GR, Instruction *I,
// Try to deduce element type for a call base. Returns false if this is an
// indirect function invocation, and true otherwise.
-static bool deduceOperandElementTypeCalledFunction(
- SPIRVGlobalRegistry *GR, Instruction *I,
- SPIRV::InstructionSet::InstructionSet InstrSet, CallInst *CI,
- SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy) {
+bool SPIRVEmitIntrinsics::deduceOperandElementTypeCalledFunction(
+ CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+ Type *&KnownElemTy) {
Function *CalledF = CI->getCalledFunction();
if (!CalledF)
return false;
@@ -726,7 +877,7 @@ static bool deduceOperandElementTypeCalledFunction(
case SPIRV::OpAtomicUMax:
case SPIRV::OpAtomicSMin:
case SPIRV::OpAtomicSMax: {
- KnownElemTy = getAtomicElemTy(GR, I, Op);
+ KnownElemTy = getAtomicElemTy(GR, CI, Op);
if (!KnownElemTy)
return true;
Ops.push_back(std::make_pair(Op, 0));
@@ -738,32 +889,44 @@ static bool deduceOperandElementTypeCalledFunction(
}
// Try to deduce element type for a function pointer.
-static void deduceOperandElementTypeFunctionPointer(
- SPIRVGlobalRegistry *GR, Instruction *I, CallInst *CI,
- SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy) {
+void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
+ CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+ Type *&KnownElemTy, bool IsPostprocessing) {
Value *Op = CI->getCalledOperand();
if (!Op || !isPointerTy(Op->getType()))
return;
Ops.push_back(std::make_pair(Op, std::numeric_limits<unsigned>::max()));
FunctionType *FTy = CI->getFunctionType();
- bool IsNewFTy = false;
+ bool IsNewFTy = false, IsUncomplete = false;
SmallVector<Type *, 4> ArgTys;
for (Value *Arg : CI->args()) {
Type *ArgTy = Arg->getType();
- if (ArgTy->isPointerTy())
+ if (ArgTy->isPointerTy()) {
if (Type *ElemTy = GR->findDeducedElementType(Arg)) {
IsNewFTy = true;
- ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy));
+ ArgTy = getTypedPointerWrapper(ElemTy, getPointerAddressSpace(ArgTy));
+ if (isTodoType(Arg))
+ IsUncomplete = true;
+ } else {
+ IsUncomplete = true;
}
+ }
ArgTys.push_back(ArgTy);
}
Type *RetTy = FTy->getReturnType();
- if (I->getType()->isPointerTy())
- if (Type *ElemTy = GR->findDeducedElementType(I)) {
+ if (CI->getType()->isPointerTy()) {
+ if (Type *ElemTy = GR->findDeducedElementType(CI)) {
IsNewFTy = true;
RetTy =
- TypedPointerType::get(ElemTy, getPointerAddressSpace(I->getType()));
+ getTypedPointerWrapper(ElemTy, getPointerAddressSpace(CI->getType()));
+ if (isTodoType(CI))
+ IsUncomplete = true;
+ } else {
+ IsUncomplete = true;
}
+ }
+ if (!IsPostprocessing && IsUncomplete)
+ insertTodoType(Op);
KnownElemTy =
IsNewFTy ? FunctionType::get(RetTy, ArgTys, FTy->isVarArg()) : FTy;
}
@@ -772,17 +935,18 @@ static void deduceOperandElementTypeFunctionPointer(
// tries to deduce them. If the Instruction has Pointer operands with known
// types which
diff er from expected, this function tries to insert a bitcast to
// resolve the issue.
-void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
- Instruction *AskOp,
- Type *AskTy,
- CallInst *AskCI) {
+void SPIRVEmitIntrinsics::deduceOperandElementType(
+ Instruction *I, SmallPtrSet<Instruction *, 4> *UncompleteRets,
+ const SmallPtrSet<Value *, 4> *AskOps, bool IsPostprocessing) {
SmallVector<std::pair<Value *, unsigned>> Ops;
Type *KnownElemTy = nullptr;
+ bool Uncomplete = false;
// look for known basic patterns of type inference
if (auto *Ref = dyn_cast<PHINode>(I)) {
if (!isPointerTy(I->getType()) ||
!(KnownElemTy = GR->findDeducedElementType(I)))
return;
+ Uncomplete = isTodoType(I);
for (unsigned i = 0; i < Ref->getNumIncomingValues(); i++) {
Value *Op = Ref->getIncomingValue(i);
if (isPointerTy(Op->getType()))
@@ -792,14 +956,12 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
KnownElemTy = GR->findDeducedElementType(I);
if (!KnownElemTy)
return;
+ Uncomplete = isTodoType(I);
Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
} else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
- KnownElemTy = Ref->getSourceElementType();
- if (isUntypedPointerTy(KnownElemTy))
- return;
- Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
- if (PointeeTy && !isUntypedPointerTy(PointeeTy))
+ if (GR->findDeducedElementType(Ref->getPointerOperand()))
return;
+ KnownElemTy = Ref->getSourceElementType();
Ops.push_back(std::make_pair(Ref->getPointerOperand(),
GetElementPtrInst::getPointerOperandIndex()));
} else if (auto *Ref = dyn_cast<LoadInst>(I)) {
@@ -812,9 +974,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
Ops.push_back(std::make_pair(Ref->getPointerOperand(),
LoadInst::getPointerOperandIndex()));
} else if (auto *Ref = dyn_cast<StoreInst>(I)) {
- if (IsKernelArgInt8(Ref->getParent()->getParent(), Ref))
- return;
- if (!(KnownElemTy = reconstructType(GR, Ref->getValueOperand())))
+ if (!(KnownElemTy =
+ reconstructType(Ref->getValueOperand(), false, IsPostprocessing)))
return;
Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
if (PointeeTy && !isUntypedPointerTy(PointeeTy))
@@ -837,27 +998,54 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
if (!isPointerTy(I->getType()) ||
!(KnownElemTy = GR->findDeducedElementType(I)))
return;
+ Uncomplete = isTodoType(I);
for (unsigned i = 0; i < Ref->getNumOperands(); i++) {
Value *Op = Ref->getOperand(i);
if (isPointerTy(Op->getType()))
Ops.push_back(std::make_pair(Op, i));
}
} else if (auto *Ref = dyn_cast<ReturnInst>(I)) {
- Type *RetTy = F->getReturnType();
+ Type *RetTy = CurrF->getReturnType();
if (!isPointerTy(RetTy))
return;
Value *Op = Ref->getReturnValue();
if (!Op)
return;
- if (!(KnownElemTy = GR->findDeducedElementType(F))) {
+ if (!(KnownElemTy = GR->findDeducedElementType(CurrF))) {
if (Type *OpElemTy = GR->findDeducedElementType(Op)) {
- GR->addDeducedElementType(F, OpElemTy);
- TypedPointerType *DerivedTy =
- TypedPointerType::get(OpElemTy, getPointerAddressSpace(RetTy));
- GR->addReturnType(F, DerivedTy);
+ GR->addDeducedElementType(CurrF, OpElemTy);
+ GR->addReturnType(CurrF, TypedPointerType::get(
+ OpElemTy, getPointerAddressSpace(RetTy)));
+ // non-recursive update of types in function uses
+ DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+ std::make_pair(I, Op)};
+ for (User *U : CurrF->users()) {
+ CallInst *CI = dyn_cast<CallInst>(U);
+ if (!CI || CI->getCalledFunction() != CurrF)
+ continue;
+ if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(CI)) {
+ if (Type *PrevElemTy = GR->findDeducedElementType(CI)) {
+ updateAssignType(AssignCI, CI, PoisonValue::get(OpElemTy));
+ propagateElemType(CI, PrevElemTy, VisitedSubst);
+ }
+ }
+ }
+ TypeValidated.insert(I);
+ // Non-recursive update of types in the function uncomplete returns.
+ // This may happen just once per a function, the latch is a pair of
+ // findDeducedElementType(F) / addDeducedElementType(F, ...).
+ // With or without the latch it is a non-recursive call due to
+ // UncompleteRets set to nullptr in this call.
+ if (UncompleteRets)
+ for (Instruction *UncompleteRetI : *UncompleteRets)
+ deduceOperandElementType(UncompleteRetI, nullptr, AskOps,
+ IsPostprocessing);
+ } else if (UncompleteRets) {
+ UncompleteRets->insert(I);
}
return;
}
+ Uncomplete = isTodoType(CurrF);
Ops.push_back(std::make_pair(Op, 0));
} else if (auto *Ref = dyn_cast<ICmpInst>(I)) {
if (!isPointerTy(Ref->getOperand(0)->getType()))
@@ -868,37 +1056,53 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
Type *ElemTy1 = GR->findDeducedElementType(Op1);
if (ElemTy0) {
KnownElemTy = ElemTy0;
+ Uncomplete = isTodoType(Op0);
Ops.push_back(std::make_pair(Op1, 1));
} else if (ElemTy1) {
KnownElemTy = ElemTy1;
+ Uncomplete = isTodoType(Op1);
Ops.push_back(std::make_pair(Op0, 0));
}
} else if (CallInst *CI = dyn_cast<CallInst>(I)) {
if (!CI->isIndirectCall())
- deduceOperandElementTypeCalledFunction(GR, I, InstrSet, CI, Ops,
- KnownElemTy);
+ deduceOperandElementTypeCalledFunction(CI, Ops, KnownElemTy);
else if (HaveFunPtrs)
- deduceOperandElementTypeFunctionPointer(GR, I, CI, Ops, KnownElemTy);
+ deduceOperandElementTypeFunctionPointer(CI, Ops, KnownElemTy,
+ IsPostprocessing);
}
// There is no enough info to deduce types or all is valid.
if (!KnownElemTy || Ops.size() == 0)
return;
- LLVMContext &Ctx = F->getContext();
+ LLVMContext &Ctx = CurrF->getContext();
IRBuilder<> B(Ctx);
for (auto &OpIt : Ops) {
Value *Op = OpIt.first;
- if (Op->use_empty() || (AskOp && Op != AskOp))
+ if (Op->use_empty())
continue;
- Type *Ty = AskOp ? AskTy : GR->findDeducedElementType(Op);
+ if (AskOps && !AskOps->contains(Op))
+ continue;
+ Type *AskTy = nullptr;
+ CallInst *AskCI = nullptr;
+ if (IsPostprocessing && AskOps) {
+ AskTy = GR->findDeducedElementType(Op);
+ AskCI = GR->findAssignPtrTypeInstr(Op);
+ assert(AskTy && AskCI);
+ }
+ Type *Ty = AskTy ? AskTy : GR->findDeducedElementType(Op);
if (Ty == KnownElemTy)
continue;
Value *OpTyVal = PoisonValue::get(KnownElemTy);
Type *OpTy = Op->getType();
- if (!Ty || AskTy || isUntypedPointerTy(Ty) ||
- UncompleteTypeInfo.contains(Op)) {
+ if (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)) {
+ Type *PrevElemTy = GR->findDeducedElementType(Op);
GR->addDeducedElementType(Op, KnownElemTy);
+ // check if KnownElemTy is complete
+ if (!Uncomplete)
+ eraseTodoType(Op);
+ else if (!IsPostprocessing)
+ insertTodoType(Op);
// check if there is existing Intrinsic::spv_assign_ptr_type instruction
CallInst *AssignCI = AskCI ? AskCI : GR->findAssignPtrTypeInstr(Op);
if (AssignCI == nullptr) {
@@ -910,31 +1114,21 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
GR->addAssignPtrTypeInstr(Op, CI);
} else {
updateAssignType(AssignCI, Op, OpTyVal);
+ DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+ std::make_pair(I, Op)};
+ propagateElemTypeRec(Op, KnownElemTy, PrevElemTy, VisitedSubst);
}
} else {
- if (auto *OpI = dyn_cast<Instruction>(Op)) {
- // spv_ptrcast's argument Op denotes an instruction that generates
- // a value, and we may use getInsertionPointAfterDef()
- B.SetInsertPoint(*OpI->getInsertionPointAfterDef());
- B.SetCurrentDebugLocation(OpI->getDebugLoc());
- } else if (auto *OpA = dyn_cast<Argument>(Op)) {
- B.SetInsertPointPastAllocas(OpA->getParent());
- B.SetCurrentDebugLocation(DebugLoc());
- } else {
- B.SetInsertPoint(F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
- }
- SmallVector<Type *, 2> Types = {OpTy, OpTy};
- SmallVector<Value *, 2> Args = {Op, buildMD(OpTyVal),
- B.getInt32(getPointerAddressSpace(OpTy))};
+ eraseTodoType(Op);
CallInst *PtrCastI =
- B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+ buildSpvPtrcast(I->getParent()->getParent(), Op, KnownElemTy);
if (OpIt.second == std::numeric_limits<unsigned>::max())
dyn_cast<CallInst>(I)->setCalledOperand(PtrCastI);
else
I->setOperand(OpIt.second, PtrCastI);
- buildAssignPtr(B, KnownElemTy, PtrCastI);
}
}
+ TypeValidated.insert(I);
}
void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
@@ -961,7 +1155,7 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) {
std::queue<Instruction *> Worklist;
- for (auto &I : instructions(F))
+ for (auto &I : instructions(CurrF))
Worklist.push(&I);
while (!Worklist.empty()) {
@@ -989,7 +1183,7 @@ void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) {
void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) {
std::queue<Instruction *> Worklist;
- for (auto &I : instructions(F))
+ for (auto &I : instructions(CurrF))
Worklist.push(&I);
while (!Worklist.empty()) {
@@ -1048,7 +1242,7 @@ Instruction *SPIRVEmitIntrinsics::visitCallInst(CallInst &Call) {
return &Call;
const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
- LLVMContext &Ctx = F->getContext();
+ LLVMContext &Ctx = CurrF->getContext();
Constant *TyC = UndefValue::get(IA->getFunctionType());
MDString *ConstraintString = MDString::get(Ctx, IA->getConstraintString());
@@ -1141,9 +1335,9 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
Type *VTy = V->getType();
// A couple of sanity checks.
- assert(isPointerTy(VTy) && "Expect a pointer type!");
- if (auto PType = dyn_cast<TypedPointerType>(VTy))
- if (PType->getElementType() != AssignedType)
+ assert((isPointerTy(VTy)) && "Expect a pointer type!");
+ if (Type *ElemTy = getPointeeType(VTy))
+ if (ElemTy != AssignedType)
report_fatal_error("Unexpected pointer element type!");
CallInst *AssignCI = GR->findAssignPtrTypeInstr(V);
@@ -1174,6 +1368,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
Instruction *I, Value *Pointer, Type *ExpectedElementType,
unsigned OperandToReplace, IRBuilder<> &B) {
+ TypeValidated.insert(I);
// If Pointer is the result of nop BitCastInst (ptr -> ptr), use the source
// pointer instead. The BitCastInst should be later removed when visited.
while (BitCastInst *BC = dyn_cast<BitCastInst>(Pointer))
@@ -1186,7 +1381,8 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
return;
setInsertPointSkippingPhis(B, I);
- MetadataAsValue *VMD = buildMD(PoisonValue::get(ExpectedElementType));
+ Value *ExpectedElementVal = PoisonValue::get(ExpectedElementType);
+ MetadataAsValue *VMD = buildMD(ExpectedElementVal);
unsigned AddressSpace = getPointerAddressSpace(Pointer->getType());
bool FirstPtrCastOrAssignPtrType = true;
@@ -1223,17 +1419,30 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
return;
}
- // // Do not emit spv_ptrcast if it would cast to the default pointer element
- // // type (i8) of the same address space.
- // if (ExpectedElementType->isIntegerTy(8))
- // return;
-
- // If this would be the first spv_ptrcast, do not emit spv_ptrcast and emit
- // spv_assign_ptr_type instead.
- if (FirstPtrCastOrAssignPtrType &&
- (isa<Instruction>(Pointer) || isa<Argument>(Pointer))) {
- buildAssignPtr(B, ExpectedElementType, Pointer);
- return;
+ if (isa<Instruction>(Pointer) || isa<Argument>(Pointer)) {
+ if (FirstPtrCastOrAssignPtrType) {
+ // If this would be the first spv_ptrcast, do not emit spv_ptrcast and
+ // emit spv_assign_ptr_type instead.
+ buildAssignPtr(B, ExpectedElementType, Pointer);
+ return;
+ } else if (isTodoType(Pointer)) {
+ eraseTodoType(Pointer);
+ if (!isa<CallInst>(Pointer) && !isa<GetElementPtrInst>(Pointer)) {
+ // If this wouldn't be the first spv_ptrcast but existing type info is
+ // uncomplete, update spv_assign_ptr_type arguments.
+ if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer)) {
+ Type *PrevElemTy = GR->findDeducedElementType(Pointer);
+ assert(PrevElemTy);
+ DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+ std::make_pair(I, Pointer)};
+ updateAssignType(AssignCI, Pointer, ExpectedElementVal);
+ propagateElemType(Pointer, PrevElemTy, VisitedSubst);
+ } else {
+ buildAssignPtr(B, ExpectedElementType, Pointer);
+ }
+ return;
+ }
+ }
}
// Emit spv_ptrcast
@@ -1249,27 +1458,48 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
IRBuilder<> &B) {
// Handle basic instructions:
StoreInst *SI = dyn_cast<StoreInst>(I);
- if (IsKernelArgInt8(F, SI)) {
- return replacePointerOperandWithPtrCast(
- I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0,
- B);
- } else if (SI) {
+ if (IsKernelArgInt8(CurrF, SI)) {
+ replacePointerOperandWithPtrCast(
+ I, SI->getValueOperand(), IntegerType::getInt8Ty(CurrF->getContext()),
+ 0, B);
+ }
+ if (SI) {
Value *Op = SI->getValueOperand();
+ Value *Pointer = SI->getPointerOperand();
Type *OpTy = Op->getType();
if (auto *OpI = dyn_cast<Instruction>(Op))
OpTy = restoreMutatedType(GR, OpI, OpTy);
if (OpTy == Op->getType())
OpTy = deduceElementTypeByValueDeep(OpTy, Op, false);
- return replacePointerOperandWithPtrCast(I, SI->getPointerOperand(), OpTy, 1,
- B);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- return replacePointerOperandWithPtrCast(I, LI->getPointerOperand(),
- LI->getType(), 0, B);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
- return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(),
- GEPI->getSourceElementType(), 0, B);
+ replacePointerOperandWithPtrCast(I, Pointer, OpTy, 1, B);
+ return;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Value *Pointer = LI->getPointerOperand();
+ Type *OpTy = LI->getType();
+ if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
+ if (Type *ElemTy = GR->findDeducedElementType(LI)) {
+ OpTy = getTypedPointerWrapper(ElemTy, PtrTy->getAddressSpace());
+ } else {
+ Type *NewOpTy = OpTy;
+ OpTy = deduceElementTypeByValueDeep(OpTy, LI, false);
+ if (OpTy == NewOpTy)
+ insertTodoType(Pointer);
+ }
+ }
+ replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
+ return;
+ }
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ Value *Pointer = GEPI->getPointerOperand();
+ Type *OpTy = GEPI->getSourceElementType();
+ replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
+ if (isNestedPointer(OpTy))
+ insertTodoType(Pointer);
+ return;
}
+ // TODO: review and merge with existing logics:
// Handle calls to builtins (non-intrinsics):
CallInst *CI = dyn_cast<CallInst>(I);
if (!CI || CI->isIndirectCall() || CI->isInlineAsm() ||
@@ -1287,8 +1517,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
Type *ArgType = CalledArg->getType();
if (!isPointerTy(ArgType)) {
CalledArgTys.push_back(nullptr);
- } else if (isTypedPointerTy(ArgType)) {
- CalledArgTys.push_back(cast<TypedPointerType>(ArgType)->getElementType());
+ } else if (Type *ArgTypeElem = getPointeeType(ArgType)) {
+ CalledArgTys.push_back(ArgTypeElem);
HaveTypes = true;
} else {
Type *ElemTy = GR->findDeducedElementType(CalledArg);
@@ -1338,7 +1568,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
if (!ExpectedType || ExpectedType->isVoidTy())
continue;
- if (ExpectedType->isTargetExtTy())
+ if (ExpectedType->isTargetExtTy() &&
+ !isTypedPointerWrapper(cast<TargetExtType>(ExpectedType)))
insertAssignPtrTypeTargetExt(cast<TargetExtType>(ExpectedType),
ArgOperand, B);
else
@@ -1419,7 +1650,7 @@ Instruction *SPIRVEmitIntrinsics::visitLoadInst(LoadInst &I) {
TrackConstants = false;
const auto *TLI = TM->getSubtargetImpl()->getTargetLowering();
MachineMemOperand::Flags Flags =
- TLI->getLoadMemOperandFlags(I, F->getDataLayout());
+ TLI->getLoadMemOperandFlags(I, CurrF->getDataLayout());
auto *NewI =
B.CreateIntrinsic(Intrinsic::spv_load, {I.getOperand(0)->getType()},
{I.getPointerOperand(), B.getInt16(Flags),
@@ -1436,7 +1667,7 @@ Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) {
TrackConstants = false;
const auto *TLI = TM->getSubtargetImpl()->getTargetLowering();
MachineMemOperand::Flags Flags =
- TLI->getStoreMemOperandFlags(I, F->getDataLayout());
+ TLI->getStoreMemOperandFlags(I, CurrF->getDataLayout());
auto *PtrOp = I.getPointerOperand();
auto *NewI = B.CreateIntrinsic(
Intrinsic::spv_store, {I.getValueOperand()->getType(), PtrOp->getType()},
@@ -1602,8 +1833,9 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
GR->addAssignPtrTypeInstr(Op, AssignCI);
} else if (!isa<Instruction>(Op)) {
Type *OpTy = Op->getType();
- if (auto PType = dyn_cast<TypedPointerType>(OpTy)) {
- buildAssignPtr(B, PType->getElementType(), Op);
+ Type *OpTyElem = getPointeeType(OpTy);
+ if (OpTyElem) {
+ buildAssignPtr(B, OpTyElem, Op);
} else if (isPointerTy(OpTy)) {
Type *ElemTy = GR->findDeducedElementType(Op);
buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op, true), Op);
@@ -1742,9 +1974,44 @@ void SPIRVEmitIntrinsics::processParamTypesByFunHeader(Function *F,
if (!isUntypedPointerTy(Arg->getType()))
continue;
Type *ElemTy = GR->findDeducedElementType(Arg);
- if (!ElemTy && hasPointeeTypeAttr(Arg) &&
- (ElemTy = getPointeeTypeByAttr(Arg)) != nullptr)
+ if (ElemTy)
+ continue;
+ if (hasPointeeTypeAttr(Arg) &&
+ (ElemTy = getPointeeTypeByAttr(Arg)) != nullptr) {
+ buildAssignPtr(B, ElemTy, Arg);
+ continue;
+ }
+ // search in function's call sites
+ for (User *U : F->users()) {
+ CallInst *CI = dyn_cast<CallInst>(U);
+ if (!CI || OpIdx >= CI->arg_size())
+ continue;
+ Value *OpArg = CI->getArgOperand(OpIdx);
+ if (!isPointerTy(OpArg->getType()))
+ continue;
+ // maybe we already know operand's element type
+ if ((ElemTy = GR->findDeducedElementType(OpArg)) != nullptr)
+ break;
+ }
+ if (ElemTy) {
buildAssignPtr(B, ElemTy, Arg);
+ continue;
+ }
+ if (HaveFunPtrs) {
+ for (User *U : Arg->users()) {
+ CallInst *CI = dyn_cast<CallInst>(U);
+ if (CI && !isa<IntrinsicInst>(CI) && CI->isIndirectCall() &&
+ CI->getCalledOperand() == Arg &&
+ CI->getParent()->getParent() == CurrF) {
+ SmallVector<std::pair<Value *, unsigned>> Ops;
+ deduceOperandElementTypeFunctionPointer(CI, Ops, ElemTy, false);
+ if (ElemTy) {
+ buildAssignPtr(B, ElemTy, Arg);
+ break;
+ }
+ }
+ }
+ }
}
}
@@ -1770,7 +2037,7 @@ static FunctionType *getFunctionPointerElemType(Function *F,
if (ArgTy->isPointerTy())
if (Type *ElemTy = GR->findDeducedElementType(&Arg)) {
IsNewFTy = true;
- ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy));
+ ArgTy = getTypedPointerWrapper(ElemTy, getPointerAddressSpace(ArgTy));
}
ArgTys.push_back(ArgTy);
}
@@ -1845,17 +2112,17 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
InstrSet = ST.isOpenCLEnv() ? SPIRV::InstructionSet::OpenCL_std
: SPIRV::InstructionSet::GLSL_std_450;
- if (!F)
+ if (!CurrF)
HaveFunPtrs =
ST.canUseExtension(SPIRV::Extension::SPV_INTEL_function_pointers);
- F = &Func;
+ CurrF = &Func;
IRBuilder<> B(Func.getContext());
AggrConsts.clear();
AggrConstTypes.clear();
AggrStores.clear();
- processParamTypesByFunHeader(F, B);
+ processParamTypesByFunHeader(CurrF, B);
// StoreInst's operand type can be changed during the next transformations,
// so we need to store it in the set. Also store already transformed types.
@@ -1878,6 +2145,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
for (auto &I : instructions(Func))
Worklist.push_back(&I);
+ // Pass forward: use operand to deduce instructions result.
for (auto &I : Worklist) {
// Don't emit intrinsincs for convergence intrinsics.
if (isConvergenceIntrinsic(I))
@@ -1894,8 +2162,18 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
insertAssignPtrTypeIntrs(I, B, true);
}
- for (auto &I : instructions(Func))
- deduceOperandElementType(&I);
+ // Pass backward: use instructions results to specify/update/cast operands
+ // where needed.
+ SmallPtrSet<Instruction *, 4> UncompleteRets;
+ for (auto &I : llvm::reverse(instructions(Func)))
+ deduceOperandElementType(&I, &UncompleteRets);
+
+ // Pass forward for PHIs only, their operands are not preceed the instruction
+ // in meaning of `instructions(Func)`.
+ for (BasicBlock &BB : Func)
+ for (PHINode &Phi : BB.phis())
+ if (isPointerTy(Phi.getType()))
+ deduceOperandElementType(&Phi, nullptr);
for (auto *I : Worklist) {
TrackConstants = true;
@@ -1917,83 +2195,83 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
return true;
}
-void SPIRVEmitIntrinsics::replaceWithPtrcasted(Instruction *CI, Type *NewElemTy,
- Type *KnownElemTy,
- CallInst *AssignCI) {
- updateAssignType(AssignCI, CI, PoisonValue::get(NewElemTy));
- IRBuilder<> B(CI->getContext());
- B.SetInsertPoint(*CI->getInsertionPointAfterDef());
- B.SetCurrentDebugLocation(CI->getDebugLoc());
- Type *OpTy = CI->getType();
- SmallVector<Type *, 2> Types = {OpTy, OpTy};
- SmallVector<Value *, 2> Args = {CI, buildMD(PoisonValue::get(KnownElemTy)),
- B.getInt32(getPointerAddressSpace(OpTy))};
- CallInst *PtrCasted =
- B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
- SmallVector<User *> Users(CI->users());
- for (auto *U : Users)
- if (U != AssignCI && U != PtrCasted)
- U->replaceUsesOfWith(CI, PtrCasted);
- buildAssignPtr(B, KnownElemTy, PtrCasted);
-}
-
// Try to deduce a better type for pointers to untyped ptr.
-bool SPIRVEmitIntrinsics::postprocessTypes() {
- bool Changed = false;
- if (!GR)
- return Changed;
- for (auto IB = PostprocessWorklist.rbegin(), IE = PostprocessWorklist.rend();
- IB != IE; ++IB) {
- CallInst *AssignCI = GR->findAssignPtrTypeInstr(*IB);
- Type *KnownTy = GR->findDeducedElementType(*IB);
- if (!KnownTy || !AssignCI || !isa<Instruction>(AssignCI->getArgOperand(0)))
+bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
+ if (!GR || TodoTypeSz == 0)
+ return false;
+
+ unsigned SzTodo = TodoTypeSz;
+ DenseMap<Value *, SmallPtrSet<Value *, 4>> ToProcess;
+ for (auto [Op, Enabled] : TodoType) {
+ // TODO: add isa<CallInst>(Op) to continue
+ if (!Enabled || isa<GetElementPtrInst>(Op))
+ continue;
+ CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op);
+ Type *KnownTy = GR->findDeducedElementType(Op);
+ if (!KnownTy || !AssignCI)
continue;
+ assert(Op == AssignCI->getArgOperand(0));
// Try to improve the type deduced after all Functions are processed.
- if (auto *CI = dyn_cast<CallInst>(*IB)) {
- if (Function *CalledF = CI->getCalledFunction()) {
- Type *RetElemTy = GR->findDeducedElementType(CalledF);
- // Fix inconsistency between known type and function's return type.
- if (RetElemTy && RetElemTy != KnownTy) {
- replaceWithPtrcasted(CI, RetElemTy, KnownTy, AssignCI);
- Changed = true;
+ if (auto *CI = dyn_cast<Instruction>(Op)) {
+ CurrF = CI->getParent()->getParent();
+ std::unordered_set<Value *> Visited;
+ if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) {
+ if (ElemTy != KnownTy) {
+ DenseSet<std::pair<Value *, Value *>> VisitedSubst;
+ propagateElemType(CI, ElemTy, VisitedSubst);
+ eraseTodoType(Op);
continue;
}
}
}
- Instruction *I = cast<Instruction>(AssignCI->getArgOperand(0));
- for (User *U : I->users()) {
+ for (User *U : Op->users()) {
Instruction *Inst = dyn_cast<Instruction>(U);
- if (!Inst || isa<IntrinsicInst>(Inst))
+ if (Inst && !isa<IntrinsicInst>(Inst))
+ ToProcess[Inst].insert(Op);
+ }
+ }
+ if (TodoTypeSz == 0)
+ return true;
+
+ for (auto &F : M) {
+ CurrF = &F;
+ SmallPtrSet<Instruction *, 4> UncompleteRets;
+ for (auto &I : llvm::reverse(instructions(F))) {
+ auto It = ToProcess.find(&I);
+ if (It == ToProcess.end())
continue;
- deduceOperandElementType(Inst, I, KnownTy, AssignCI);
- if (KnownTy != GR->findDeducedElementType(I)) {
- Changed = true;
- break;
- }
+ It->second.remove_if([this](Value *V) { return !isTodoType(V); });
+ if (It->second.size() == 0)
+ continue;
+ deduceOperandElementType(&I, &UncompleteRets, &It->second, true);
+ if (TodoTypeSz == 0)
+ return true;
}
}
- return Changed;
+
+ return SzTodo > TodoTypeSz;
}
bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
bool Changed = false;
- UncompleteTypeInfo.clear();
- PostprocessWorklist.clear();
+ TodoType.clear();
for (auto &F : M)
Changed |= runOnFunction(F);
+ // Specify function parameters after all functions were processed.
for (auto &F : M) {
// check if function parameter types are set
+ CurrF = &F;
if (!F.isDeclaration() && !F.isIntrinsic()) {
- const SPIRVSubtarget &ST = TM->getSubtarget<SPIRVSubtarget>(F);
- GR = ST.getSPIRVGlobalRegistry();
IRBuilder<> B(F.getContext());
processParamTypes(&F, B);
}
}
- Changed |= postprocessTypes();
+ CanTodoType = false;
+ Changed |= postprocessTypes(M);
+
if (HaveFunPtrs)
Changed |= processFunctionPointers(M);
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 6f222883ee07de..9ac659f6b4f111 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -28,6 +28,18 @@
#include <functional>
using namespace llvm;
+
+inline unsigned typeToAddressSpace(const Type *Ty) {
+ if (auto PType = dyn_cast<TypedPointerType>(Ty))
+ return PType->getAddressSpace();
+ if (auto PType = dyn_cast<PointerType>(Ty))
+ return PType->getAddressSpace();
+ if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+ ExtTy && isTypedPointerWrapper(ExtTy))
+ return ExtTy->getIntParameter(0);
+ report_fatal_error("Unable to convert LLVM type to SPIRVType", true);
+}
+
SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize)
: PointerSize(PointerSize), Bound(0) {}
@@ -69,7 +81,7 @@ SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg(
void SPIRVGlobalRegistry::assignSPIRVTypeToVReg(SPIRVType *SpirvType,
Register VReg,
- MachineFunction &MF) {
+ const MachineFunction &MF) {
VRegToTypeMap[&MF][VReg] = SpirvType;
}
@@ -570,15 +582,15 @@ Register
SPIRVGlobalRegistry::getOrCreateConstNullPtr(MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType) {
const Type *LLVMTy = getTypeForSPIRVType(SpvType);
- const TypedPointerType *LLVMPtrTy = cast<TypedPointerType>(LLVMTy);
+ unsigned AddressSpace = typeToAddressSpace(LLVMTy);
// Find a constant in DT or build a new one.
- Constant *CP = ConstantPointerNull::get(PointerType::get(
- LLVMPtrTy->getElementType(), LLVMPtrTy->getAddressSpace()));
+ Constant *CP = ConstantPointerNull::get(
+ PointerType::get(::getPointeeType(LLVMTy), AddressSpace));
Register Res = DT.find(CP, CurMF);
if (!Res.isValid()) {
- LLT LLTy = LLT::pointer(LLVMPtrTy->getAddressSpace(), PointerSize);
+ LLT LLTy = LLT::pointer(AddressSpace, PointerSize);
Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
- CurMF->getRegInfo().setRegClass(Res, &SPIRV::iIDRegClass);
+ CurMF->getRegInfo().setRegClass(Res, &SPIRV::pIDRegClass);
assignSPIRVTypeToVReg(SpvType, Res, *CurMF);
MIRBuilder.buildInstr(SPIRV::OpConstantNull)
.addDef(Res)
@@ -978,18 +990,11 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(
}
return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder);
}
- unsigned AddrSpace = 0xFFFF;
- if (auto PType = dyn_cast<TypedPointerType>(Ty))
- AddrSpace = PType->getAddressSpace();
- else if (auto PType = dyn_cast<PointerType>(Ty))
- AddrSpace = PType->getAddressSpace();
- else
- report_fatal_error("Unable to convert LLVM type to SPIRVType", true);
+ unsigned AddrSpace = typeToAddressSpace(Ty);
SPIRVType *SpvElementType = nullptr;
- if (auto PType = dyn_cast<TypedPointerType>(Ty))
- SpvElementType = getOrCreateSPIRVType(PType->getElementType(), MIRBuilder,
- AccQual, EmitIR);
+ if (Type *ElemTy = ::getPointeeType(Ty))
+ SpvElementType = getOrCreateSPIRVType(ElemTy, MIRBuilder, AccQual, EmitIR);
else
SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder);
@@ -1029,7 +1034,11 @@ SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
// will be added later. For special types it is already added to DT.
if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() &&
!isSpecialOpaqueType(Ty)) {
- if (!isPointerTy(Ty))
+ if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+ ExtTy && isTypedPointerWrapper(ExtTy))
+ DT.add(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0),
+ &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
+ else if (!isPointerTy(Ty))
DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
else if (isTypedPointerTy(Ty))
DT.add(cast<TypedPointerType>(Ty)->getElementType(),
@@ -1065,7 +1074,11 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
const Type *Ty, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
Register Reg;
- if (!isPointerTy(Ty)) {
+ if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+ ExtTy && isTypedPointerWrapper(ExtTy)) {
+ Reg = DT.find(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0),
+ &MIRBuilder.getMF());
+ } else if (!isPointerTy(Ty)) {
Ty = adjustIntTypeByWidth(Ty);
Reg = DT.find(Ty, &MIRBuilder.getMF());
} else if (isTypedPointerTy(Ty)) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 3bb86e8be69500..ff4b0ea8757fa4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -330,7 +330,7 @@ class SPIRVGlobalRegistry {
// In cases where the SPIR-V type is already known, this function can be
// used to map it to the given VReg via an ASSIGN_TYPE instruction.
void assignSPIRVTypeToVReg(SPIRVType *Type, Register VReg,
- MachineFunction &MF);
+ const MachineFunction &MF);
// Either generate a new OpTypeXXX instruction or return an existing one
// corresponding to the given LLVM IR type.
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index 59a1bf50b771b9..d5b81bf46c804e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -111,8 +111,8 @@ static void doInsertBitcast(const SPIRVSubtarget &STI, MachineRegisterInfo *MRI,
SPIRVGlobalRegistry &GR, MachineInstr &I,
Register OpReg, unsigned OpIdx,
SPIRVType *NewPtrType) {
- Register NewReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
MachineIRBuilder MIB(I);
+ Register NewReg = createVirtualRegister(NewPtrType, &GR, MRI, MIB.getMF());
bool Res = MIB.buildInstr(SPIRV::OpBitcast)
.addDef(NewReg)
.addUse(GR.getSPIRVTypeID(NewPtrType))
@@ -121,8 +121,6 @@ static void doInsertBitcast(const SPIRVSubtarget &STI, MachineRegisterInfo *MRI,
*STI.getRegBankInfo());
if (!Res)
report_fatal_error("insert validation bitcast: cannot constrain all uses");
- MRI->setRegClass(NewReg, &SPIRV::iIDRegClass);
- GR.assignSPIRVTypeToVReg(NewPtrType, NewReg, MIB.getMF());
I.getOperand(OpIdx).setReg(NewReg);
}
@@ -396,6 +394,7 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const {
case SPIRV::OpGenericCastToPtr:
validateAccessChain(STI, MRI, GR, MI);
break;
+ case SPIRV::OpPtrAccessChain:
case SPIRV::OpInBoundsPtrAccessChain:
if (MI.getNumOperands() == 4)
validateAccessChain(STI, MRI, GR, MI);
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index ec1e13a90971ba..90898b8bd72503 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -357,12 +357,13 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
verify(*ST.getInstrInfo());
}
-static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType,
+static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpvType,
LegalizerHelper &Helper,
MachineRegisterInfo &MRI,
SPIRVGlobalRegistry *GR) {
Register ConvReg = MRI.createGenericVirtualRegister(ConvTy);
- GR->assignSPIRVTypeToVReg(SpirvType, ConvReg, Helper.MIRBuilder.getMF());
+ MRI.setRegClass(ConvReg, GR->getRegClass(SpvType));
+ GR->assignSPIRVTypeToVReg(SpvType, ConvReg, Helper.MIRBuilder.getMF());
Helper.MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
.addDef(ConvReg)
.addUse(Reg);
diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
index 11b9e4f6f6d17b..3373d8e24dab48 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
@@ -102,10 +102,7 @@ static void processNewInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
if (!ResType) {
// There was no "assign type" actions, let's fix this now
ResType = ScalarType;
- MRI.setRegClass(ResVReg, &SPIRV::iIDRegClass);
- MRI.setType(ResVReg,
- LLT::scalar(GR->getScalarOrVectorBitWidth(ResType)));
- GR->assignSPIRVTypeToVReg(ResType, ResVReg, *GR->CurMF);
+ setRegClassType(ResVReg, ResType, GR, &MRI, *GR->CurMF, true);
}
}
} else if (mayBeInserted(Opcode) && I.getNumDefs() == 1 &&
@@ -124,9 +121,7 @@ static void processNewInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
if (!ResVType)
continue;
// Set type & class
- MRI.setRegClass(ResVReg, GR->getRegClass(ResVType));
- MRI.setType(ResVReg, GR->getRegType(ResVType));
- GR->assignSPIRVTypeToVReg(ResVType, ResVReg, *GR->CurMF);
+ setRegClassType(ResVReg, ResVType, GR, &MRI, *GR->CurMF, true);
}
// If this is a simple operation that is to be reduced by TableGen
// definition we must apply some of pre-legalizer rules here
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index f899b8b67affe8..1ece3044aaa7bb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -13,6 +13,7 @@
#include "SPIRVUtils.h"
#include "MCTargetDesc/SPIRVBaseInfo.h"
#include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
#include "SPIRVInstrInfo.h"
#include "SPIRVSubtarget.h"
#include "llvm/ADT/StringRef.h"
@@ -21,6 +22,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Demangle/Demangle.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
#include <queue>
#include <vector>
@@ -405,8 +407,10 @@ bool hasBuiltinTypePrefix(StringRef Name) {
}
bool isSpecialOpaqueType(const Type *Ty) {
- if (const TargetExtType *EType = dyn_cast<TargetExtType>(Ty))
- return hasBuiltinTypePrefix(EType->getName());
+ if (const TargetExtType *ExtTy = dyn_cast<TargetExtType>(Ty))
+ return isTypedPointerWrapper(ExtTy)
+ ? false
+ : hasBuiltinTypePrefix(ExtTy->getName());
return false;
}
@@ -684,4 +688,77 @@ bool getVacantFunctionName(Module &M, std::string &Name) {
return false;
}
+// Assign SPIR-V type to the register. If the register has no valid assigned
+// class, set register LLT type and class according to the SPIR-V type.
+void setRegClassType(Register Reg, SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+ MachineRegisterInfo *MRI, const MachineFunction &MF,
+ bool Force) {
+ GR->assignSPIRVTypeToVReg(SpvType, Reg, MF);
+ if (!MRI->getRegClassOrNull(Reg) || Force) {
+ MRI->setRegClass(Reg, GR->getRegClass(SpvType));
+ MRI->setType(Reg, GR->getRegType(SpvType));
+ }
+}
+
+// Create a SPIR-V type, assign SPIR-V type to the register. If the register has
+// no valid assigned class, set register LLT type and class according to the
+// SPIR-V type.
+void setRegClassType(Register Reg, const Type *Ty, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIRBuilder, bool Force) {
+ setRegClassType(Reg, GR->getOrCreateSPIRVType(Ty, MIRBuilder), GR,
+ MIRBuilder.getMRI(), MIRBuilder.getMF(), Force);
+}
+
+// Create a virtual register and assign SPIR-V type to the register. Set
+// register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+ MachineRegisterInfo *MRI,
+ const MachineFunction &MF) {
+ Register Reg = MRI->createVirtualRegister(GR->getRegClass(SpvType));
+ MRI->setType(Reg, GR->getRegType(SpvType));
+ GR->assignSPIRVTypeToVReg(SpvType, Reg, MF);
+ return Reg;
+}
+
+// Create a virtual register and assign SPIR-V type to the register. Set
+// register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIRBuilder) {
+ return createVirtualRegister(SpvType, GR, MIRBuilder.getMRI(),
+ MIRBuilder.getMF());
+}
+
+// Create a SPIR-V type, virtual register and assign SPIR-V type to the
+// register. Set register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIRBuilder) {
+ return createVirtualRegister(GR->getOrCreateSPIRVType(Ty, MIRBuilder), GR,
+ MIRBuilder);
+}
+
+// Return true if there is an opaque pointer type nested in the argument.
+bool isNestedPointer(const Type *Ty) {
+ if (Ty->isPtrOrPtrVectorTy())
+ return true;
+ if (const FunctionType *RefTy = dyn_cast<FunctionType>(Ty)) {
+ if (isNestedPointer(RefTy->getReturnType()))
+ return true;
+ for (const Type *ArgTy : RefTy->params())
+ if (isNestedPointer(ArgTy))
+ return true;
+ return false;
+ }
+ if (const ArrayType *RefTy = dyn_cast<ArrayType>(Ty))
+ return isNestedPointer(RefTy->getElementType());
+ return false;
+}
+
+bool isSpvIntrinsic(const Value *Arg) {
+ if (const auto *II = dyn_cast<IntrinsicInst>(Arg))
+ if (Function *F = II->getCalledFunction())
+ if (F->getName().starts_with("llvm.spv."))
+ return true;
+ return false;
+}
+
} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index d218dbd850dc7a..c0569549039d5c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -34,6 +34,7 @@ class Register;
class StringRef;
class SPIRVInstrInfo;
class SPIRVSubtarget;
+class SPIRVGlobalRegistry;
// This class implements a partial ordering visitor, which visits a cyclic graph
// in natural topological-like ordering. Topological ordering is not defined for
@@ -198,6 +199,8 @@ uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI);
// Check if MI is a SPIR-V specific intrinsic call.
bool isSpvIntrinsic(const MachineInstr &MI, Intrinsic::ID IntrinsicID);
+// Check if it's a SPIR-V specific intrinsic call.
+bool isSpvIntrinsic(const Value *Arg);
// Get type of i-th operand of the metadata node.
Type *getMDOperandAsType(const MDNode *N, unsigned I);
@@ -276,12 +279,19 @@ inline Type *getTypedPointerWrapper(Type *ElemTy, unsigned AS) {
{ElemTy}, {AS});
}
-inline bool isTypedPointerWrapper(TargetExtType *ExtTy) {
+inline bool isTypedPointerWrapper(const TargetExtType *ExtTy) {
return ExtTy->getName() == TYPED_PTR_TARGET_EXT_NAME &&
ExtTy->getNumIntParameters() == 1 &&
ExtTy->getNumTypeParameters() == 1;
}
+// True if this is an instance of PointerType or TypedPointerType.
+inline bool isPointerTyOrWrapper(const Type *Ty) {
+ if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
+ return isTypedPointerWrapper(ExtTy);
+ return isPointerTy(Ty);
+}
+
inline Type *applyWrappers(Type *Ty) {
if (auto *ExtTy = dyn_cast<TargetExtType>(Ty)) {
if (isTypedPointerWrapper(ExtTy))
@@ -296,12 +306,14 @@ inline Type *applyWrappers(Type *Ty) {
return Ty;
}
-inline Type *getPointeeType(Type *Ty) {
- if (auto PType = dyn_cast<TypedPointerType>(Ty))
- return PType->getElementType();
- else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
- if (isTypedPointerWrapper(ExtTy))
- return applyWrappers(ExtTy->getTypeParameter(0));
+inline Type *getPointeeType(const Type *Ty) {
+ if (Ty) {
+ if (auto PType = dyn_cast<TypedPointerType>(Ty))
+ return PType->getElementType();
+ else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
+ if (isTypedPointerWrapper(ExtTy))
+ return ExtTy->getTypeParameter(0);
+ }
return nullptr;
}
@@ -360,5 +372,23 @@ MachineInstr *getVRegDef(MachineRegisterInfo &MRI, Register Reg);
#define SPIRV_BACKEND_SERVICE_FUN_NAME "__spirv_backend_service_fun"
bool getVacantFunctionName(Module &M, std::string &Name);
+void setRegClassType(Register Reg, const Type *Ty, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIRBuilder, bool Force = false);
+void setRegClassType(Register Reg, const MachineInstr *SpvType,
+ SPIRVGlobalRegistry *GR, MachineRegisterInfo *MRI,
+ const MachineFunction &MF, bool Force = false);
+Register createVirtualRegister(const MachineInstr *SpvType,
+ SPIRVGlobalRegistry *GR,
+ MachineRegisterInfo *MRI,
+ const MachineFunction &MF);
+Register createVirtualRegister(const MachineInstr *SpvType,
+ SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIRBuilder);
+Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIRBuilder);
+
+// Return true if there is an opaque pointer type nested in the argument.
+bool isNestedPointer(const Type *Ty);
+
} // namespace llvm
#endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
index 3ebfa1d8c8a9d9..6aeb29df9f7bd4 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
@@ -1,7 +1,6 @@
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s
; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-; CHECK-DAG: OpCapability Int8
; CHECK-DAG: OpCapability FunctionPointersINTEL
; CHECK-DAG: OpCapability Int64
; CHECK: OpExtension "SPV_INTEL_function_pointers"
@@ -9,19 +8,14 @@
; CHECK-DAG: %[[TyVoid:.*]] = OpTypeVoid
; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0
; CHECK-DAG: %[[TyFun:.*]] = OpTypeFunction %[[TyInt64]] %[[TyInt64]]
-; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0
; CHECK-DAG: %[[TyPtrFunCodeSection:.*]] = OpTypePointer CodeSectionINTEL %[[TyFun]]
; CHECK-DAG: %[[ConstFunFp:.*]] = OpConstantFunctionPointerINTEL %[[TyPtrFunCodeSection]] %[[DefFunFp:.*]]
; CHECK-DAG: %[[TyPtrFun:.*]] = OpTypePointer Function %[[TyFun]]
; CHECK-DAG: %[[TyPtrPtrFun:.*]] = OpTypePointer Function %[[TyPtrFun]]
-; CHECK-DAG: %[[TyPtrInt8:.*]] = OpTypePointer Function %[[TyInt8]]
-; CHECK-DAG: %[[TyPtrPtrInt8:.*]] = OpTypePointer Function %[[TyPtrInt8]]
; CHECK: OpFunction
-; CHECK: %[[Var:.*]] = OpVariable %[[TyPtrPtrInt8]] Function
-; CHECK: %[[SAddr:.*]] = OpBitcast %[[TyPtrPtrFun]] %[[Var]]
-; CHECK: OpStore %[[SAddr]] %[[ConstFunFp]]
-; CHECK: %[[LAddr:.*]] = OpBitcast %[[TyPtrPtrFun]] %[[Var]]
-; CHECK: %[[FP:.*]] = OpLoad %[[TyPtrFun]] %[[LAddr]]
+; CHECK: %[[Var:.*]] = OpVariable %[[TyPtrPtrFun]] Function
+; CHECK: OpStore %[[Var]] %[[ConstFunFp]]
+; CHECK: %[[FP:.*]] = OpLoad %[[TyPtrFun]] %[[Var]]
; CHECK: OpFunctionPointerCallINTEL %[[TyInt64]] %[[FP]] %[[#]]
; CHECK: OpFunctionEnd
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
index eb7b1dffaee501..9fa46f50a2e89b 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s
; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; CHECK-DAG: OpCapability Int8
@@ -12,13 +12,15 @@
; CHECK-DAG: OpName %[[test:.*]] "test"
; CHECK-DAG: %[[TyVoid:.*]] = OpTypeVoid
; CHECK-DAG: %[[TyFloat32:.*]] = OpTypeFloat 32
-; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0
; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0
+; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0
; CHECK-DAG: %[[TyPtrInt8:.*]] = OpTypePointer Function %[[TyInt8]]
-; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrInt8]]
-; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]]
-; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrFp]] %[[TyPtrInt8]]
+; CHECK-DAG: %[[TyUncompleteFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrInt8]]
+; CHECK-DAG: %[[TyPtrUncompleteFp:.*]] = OpTypePointer Function %[[TyUncompleteFp]]
+; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrUncompleteFp]] %[[TyPtrInt8]]
; CHECK-DAG: %[[TyPtrBar:.*]] = OpTypePointer Function %[[TyBar]]
+; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrBar]]
+; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]]
; CHECK-DAG: %[[TyTest:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrFp]] %[[TyPtrInt8]] %[[TyPtrBar]]
; CHECK: %[[test]] = OpFunction %[[TyVoid]] None %[[TyTest]]
; CHECK: %[[fp]] = OpFunctionParameter %[[TyPtrFp]]
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
index 9374e154a0239f..13667f44389e7b 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
@@ -37,7 +37,8 @@
; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
-; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - -filetype=obj | spirv-val %}
; CHECK-ERROR: LLVM ERROR: intel_sub_group_shuffle: the builtin requires the following SPIR-V extension: SPV_INTEL_subgroups
diff --git a/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll b/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll
index bf71eb5628e217..82b115c77987f9 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll
@@ -13,16 +13,16 @@
; CHECK-DAG: %[[Struct2:.*]] = OpTypeStruct %[[Struct1]]
; CHECK-DAG: %[[StructPtr:.*]] = OpTypePointer Function %[[Struct2]]
; CHECK-DAG: %[[Bool:.*]] = OpTypeBool
-; CHECK-DAG: %[[FooType:.*]] = OpTypeFunction %[[StructPtr:.*]] %[[StructPtr]] %[[StructPtr]] %[[Bool]]
+; CHECK-DAG: %[[FooType:.*]] = OpTypeFunction %[[StructPtr]] %[[StructPtr]] %[[StructPtr]] %[[Bool]]
; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0
; CHECK-DAG: %[[CharPtr:.*]] = OpTypePointer Function %[[Char]]
; CHECK: %[[Test1]] = OpFunction
-; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Foo]]
-; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Bar]]
+; CHECK: OpFunctionCall %[[StructPtr]] %[[Foo]]
+; CHECK: OpFunctionCall %[[CharPtr]] %[[Bar]]
; CHECK: OpFunctionEnd
-; CHECK: %[[Foo]] = OpFunction %[[StructPtr:.*]] None %[[FooType]]
+; CHECK: %[[Foo]] = OpFunction %[[StructPtr]] None %[[FooType]]
; CHECK: %[[Arg1:.*]] = OpFunctionParameter %[[StructPtr]]
; CHECK: %[[Arg2:.*]] = OpFunctionParameter
; CHECK: %[[Sw:.*]] = OpFunctionParameter
@@ -30,17 +30,18 @@
; CHECK: OpReturnValue %[[Res]]
; CHECK: OpReturnValue %[[Arg2]]
-; CHECK: %[[Bar]] = OpFunction %[[StructPtr:.*]] None %[[#]]
-; CHECK: %[[BarArg:.*]] = OpFunctionParameter
-; CHECK: %[[BarRes:.*]] = OpInBoundsPtrAccessChain %[[CharPtr]] %[[BarArg]] %[[#]]
-; CHECK: %[[BarResCasted:.*]] = OpBitcast %[[StructPtr]] %[[BarRes]]
+; CHECK: %[[Bar]] = OpFunction %[[CharPtr]] None %[[#]]
+; CHECK: %[[BarArg:.*]] = OpFunctionParameter %[[StructPtr]]
+; CHECK: %[[BarArgCasted:.*]] = OpBitcast %[[CharPtr]] %[[BarArg]]
+; CHECK: %[[BarRes:.*]] = OpInBoundsPtrAccessChain %[[CharPtr]] %[[BarArgCasted]] %[[#]]
; CHECK: %[[BarResStruct:.*]] = OpInBoundsPtrAccessChain %[[StructPtr]] %[[#]] %[[#]]
-; CHECK: OpReturnValue %[[BarResStruct]]
-; CHECK: OpReturnValue %[[BarResCasted]]
+; CHECK: %[[BarResStructCasted:.*]] = OpBitcast %[[CharPtr]] %[[BarResStruct]]
+; CHECK: OpReturnValue %[[BarResStructCasted]]
+; CHECK: OpReturnValue %[[BarRes]]
; CHECK: %[[Test2]] = OpFunction
-; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Foo]]
-; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Bar]]
+; CHECK: OpFunctionCall %[[StructPtr]] %[[Foo]]
+; CHECK: OpFunctionCall %[[CharPtr]] %[[Bar]]
; CHECK: OpFunctionEnd
%struct = type { %array }
diff --git a/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll b/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll
new file mode 100644
index 00000000000000..a846e1936d7ac5
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll
@@ -0,0 +1,50 @@
+; The goal of the test case is to ensure that correct types are applied to virtual registers which were
+; used as return values in call lowering. Pass criterion is that spirv-val considers output valid.
+
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+%t_half = type { half }
+%t_i17 = type { [17 x i32] }
+%t_h17 = type { [17 x %t_half] }
+
+define internal spir_func void @foo(i64 %arrayinit.cur.add_4, half %r1, ptr addrspace(4) noundef align 8 dereferenceable_or_null(72) %this) {
+entry:
+ %r_3 = alloca %t_h17, align 8
+ %p_src = alloca %t_i17, align 4
+ %p_src4 = addrspacecast ptr %p_src to ptr addrspace(4)
+ %call_2 = call spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef %p_src4, i32 noundef 7)
+ br label %l_body
+
+l_body: ; preds = %l_body, %entry
+ %l_done = icmp eq i64 %arrayinit.cur.add_4, 34
+ br i1 %l_done, label %exit, label %l_body
+
+exit: ; preds = %l_body
+ %0 = addrspacecast ptr %call_2 to ptr addrspace(4)
+ %call_6 = call spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef %0, i32 noundef 7)
+ br label %for.cond_3
+
+for.cond_3: ; preds = %for.body_3, %exit
+ %lsr.iv1 = phi ptr [ %scevgep2, %for.body_3 ], [ %call_6, %exit ]
+ %lsr.iv = phi ptr [ %scevgep, %for.body_3 ], [ %r_3, %exit ]
+ %i.0_3 = phi i64 [ 0, %exit ], [ %inc_3, %for.body_3 ]
+ %cmp_3 = icmp ult i64 %i.0_3, 17
+ br i1 %cmp_3, label %for.body_3, label %exit2
+
+for.body_3: ; preds = %for.cond_3
+ %call2_5 = call spir_func noundef half @_Z17__spirv_ocl_frexpDF16_PU3AS0i(half noundef %r1, ptr noundef %lsr.iv1)
+ store half %call2_5, ptr %lsr.iv, align 2
+ %inc_3 = add nuw nsw i64 %i.0_3, 1
+ %scevgep = getelementptr i8, ptr %lsr.iv, i64 2
+ %scevgep2 = getelementptr i8, ptr %lsr.iv1, i64 4
+ br label %for.cond_3
+
+exit2: ; preds = %for.cond_3
+ ret void
+}
+
+declare dso_local spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef, i32 noundef)
+declare dso_local spir_func noundef half @_Z17__spirv_ocl_frexpDF16_PU3AS0i(half noundef, ptr noundef)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll b/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll
new file mode 100644
index 00000000000000..0e2730e18bf382
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll
@@ -0,0 +1,42 @@
+; The goal of the test is to ensure that type inference doesn't break validity of the generated SPIR-V code.
+; The only pass criterion is that spirv-val considers output valid.
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#Char:]] = OpTypeInt 8 0
+; CHECK-DAG: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]]
+; CHECK-DAG: %[[#PtrCharCW:]] = OpTypePointer CrossWorkgroup %[[#Char]]
+; CHECK-DAG: %[[#PtrCharGen:]] = OpTypePointer Generic %[[#Char]]
+; CHECK-DAG: %[[#Struct:]] = OpTypeStruct %[[#]] %[[#]] %[[#]]
+; CHECK-DAG: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]]
+; CHECK-DAG: %[[#PtrPtrCharGen:]] = OpTypePointer Function %[[#PtrCharGen]]
+; CHECK-DAG: %[[#PtrStruct:]] = OpTypePointer Function %[[#Struct]]
+; CHECK: OpFunction
+; CHECK: %[[#Arg1:]] = OpFunctionParameter %[[#Int]]
+; CHECK: %[[#Arg2:]] = OpFunctionParameter %[[#PtrCharCW]]
+; CHECK: %[[#Kernel:]] = OpVariable %[[#PtrStruct]] Function
+; CHECK: %[[#IntKernel:]] = OpBitcast %[[#PtrInt]] %[[#Kernel]]
+; CHECK: OpStore %[[#IntKernel]] %[[#Arg1]]
+; CHECK: %[[#CharKernel:]] = OpBitcast %[[#PtrChar]] %[[#Kernel]]
+; CHECK: %[[#P:]] = OpInBoundsPtrAccessChain %[[#PtrChar]] %[[#CharKernel]] %[[#]]
+; CHECK: %[[#R0:]] = OpPtrCastToGeneric %[[#PtrCharGen]] %[[#Arg2]]
+; CHECK: %[[#P2:]] = OpBitcast %[[#PtrPtrCharGen]] %[[#P]]
+; CHECK: OpStore %[[#P2]] %[[#R0]]
+; CHECK: %[[#P3:]] = OpBitcast %[[#PtrPtrCharGen]] %[[#P]]
+; CHECK: %[[#]] = OpLoad %[[#PtrCharGen]] %[[#P3]]
+
+%"class.std::complex" = type { { double, double } }
+%class.anon = type { i32, ptr addrspace(4), [2 x [2 x %"class.std::complex"]] }
+
+define weak_odr dso_local spir_kernel void @foo(i32 noundef %_arg_N, ptr addrspace(1) noundef align 8 %_arg_p) {
+entry:
+ %Kernel = alloca %class.anon, align 8
+ store i32 %_arg_N, ptr %Kernel, align 8
+ %p = getelementptr inbounds i8, ptr %Kernel, i64 8
+ %r0 = addrspacecast ptr addrspace(1) %_arg_p to ptr addrspace(4)
+ store ptr addrspace(4) %r0, ptr %p, align 8
+ %r3 = load ptr addrspace(4), ptr %p, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll b/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll
new file mode 100644
index 00000000000000..d94da31890ab17
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll
@@ -0,0 +1,36 @@
+; The goal of the test is to ensure that type inference doesn't break validity of the generated SPIR-V code.
+; The only pass criterion is that spirv-val considers output valid.
+
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpFunction
+
+%class.anon = type { i32, ptr addrspace(4)}
+
+define weak_odr dso_local spir_kernel void @foo(i32 noundef %_arg_N, i1 %fl) {
+entry:
+ %__SYCLKernel = alloca %class.anon, align 8
+ store i32 %_arg_N, ptr %__SYCLKernel, align 8
+ br label %arinit
+
+arinit:
+ %scevgep3 = getelementptr nuw i8, ptr %__SYCLKernel, i64 24
+ br label %for.cond.i
+
+for.cond.i:
+ %lsr.iv4 = phi ptr [ %scevgep5, %for.body.i ], [ %scevgep3, %arinit ]
+ br i1 %fl, label %for.body.i, label %exit
+
+for.body.i:
+ %scevgep6 = getelementptr i8, ptr %lsr.iv4, i64 -8
+ %_M_value.imag.i.i = load double, ptr %lsr.iv4, align 8
+ %scevgep5 = getelementptr i8, ptr %lsr.iv4, i64 32
+ br label %for.cond.i
+
+exit:
+ ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll
new file mode 100644
index 00000000000000..a9e79df259c4fb
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll
@@ -0,0 +1,82 @@
+; The goal of the test case is to ensure that correct types are applied to PHI's as arguments of other PHI's.
+; Pass criterion is that spirv-val considers output valid.
+
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+
+; CHECK-DAG: OpName %[[#Foo:]] "foo"
+; CHECK-DAG: OpName %[[#FooVal1:]] "val1"
+; CHECK-DAG: OpName %[[#FooVal2:]] "val2"
+; CHECK-DAG: OpName %[[#FooVal3:]] "val3"
+; CHECK-DAG: OpName %[[#Bar:]] "bar"
+; CHECK-DAG: OpName %[[#BarVal1:]] "val1"
+; CHECK-DAG: OpName %[[#BarVal2:]] "val2"
+; CHECK-DAG: OpName %[[#BarVal3:]] "val3"
+
+; CHECK-DAG: %[[#Short:]] = OpTypeInt 16 0
+; CHECK-DAG: %[[#ShortGenPtr:]] = OpTypePointer Generic %[[#Short]]
+; CHECK-DAG: %[[#ShortWrkPtr:]] = OpTypePointer Workgroup %[[#Short]]
+; CHECK-DAG: %[[#G1:]] = OpVariable %[[#ShortWrkPtr]] Workgroup
+
+; CHECK: %[[#Foo:]] = OpFunction %[[#]] None %[[#]]
+; CHECK: %[[#FooArgP:]] = OpFunctionParameter %[[#ShortGenPtr]]
+; CHECK: OpFunctionParameter
+; CHECK: OpFunctionParameter
+; CHECK: OpFunctionParameter
+; CHECK: %[[#FooG1:]] = OpPtrCastToGeneric %[[#ShortGenPtr]] %[[#G1]]
+; CHECK: %[[#FooVal2]] = OpPhi %[[#ShortGenPtr]] %[[#FooArgP]] %[[#]] %[[#FooVal3]] %[[#]]
+; CHECK: %[[#FooVal1]] = OpPhi %[[#ShortGenPtr]] %[[#FooG1]] %[[#]] %[[#FooVal2]] %[[#]]
+; CHECK: %[[#FooVal3]] = OpLoad %[[#ShortGenPtr]] %[[#]]
+
+; CHECK: %[[#Bar:]] = OpFunction %[[#]] None %[[#]]
+; CHECK: %[[#BarArgP:]] = OpFunctionParameter %[[#ShortGenPtr]]
+; CHECK: OpFunctionParameter
+; CHECK: OpFunctionParameter
+; CHECK: OpFunctionParameter
+; CHECK: %[[#BarVal3]] = OpLoad %[[#ShortGenPtr]] %[[#]]
+; CHECK: %[[#BarG1:]] = OpPtrCastToGeneric %[[#ShortGenPtr]] %[[#G1]]
+; CHECK: %[[#BarVal1]] = OpPhi %[[#ShortGenPtr]] %[[#BarG1]] %[[#]] %[[#BarVal2]] %[[#]]
+; CHECK: %[[#BarVal2]] = OpPhi %[[#ShortGenPtr]] %[[#BarArgP]] %[[#]] %[[#BarVal3]] %[[#]]
+
+ at G1 = internal addrspace(3) global i16 undef, align 8
+ at G2 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+
+define spir_kernel void @foo(ptr addrspace(4) %p, i1 %f1, i1 %f2, i1 %f3) {
+entry:
+ br label %l1
+
+l1:
+ br i1 %f1, label %l2, label %exit
+
+l2:
+ %val2 = phi ptr addrspace(4) [ %p, %l1 ], [ %val3, %l3 ]
+ %val1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @G1 to ptr addrspace(4)), %l1 ], [ %val2, %l3 ]
+ br i1 %f2, label %l3, label %exit
+
+l3:
+ %val3 = load ptr addrspace(4), ptr addrspace(3) @G2, align 8
+ br i1 %f3, label %l2, label %exit
+
+exit:
+ ret void
+}
+
+define spir_kernel void @bar(ptr addrspace(4) %p, i1 %f1, i1 %f2, i1 %f3) {
+entry:
+ %val3 = load ptr addrspace(4), ptr addrspace(3) @G2, align 8
+ br label %l1
+
+l3:
+ br i1 %f3, label %l2, label %exit
+
+l1:
+ br i1 %f1, label %l2, label %exit
+
+l2:
+ %val1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @G1 to ptr addrspace(4)), %l1 ], [ %val2, %l3 ]
+ %val2 = phi ptr addrspace(4) [ %p, %l1 ], [ %val3, %l3 ]
+ br i1 %f2, label %l3, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
index 07824d4ed6cd85..f4c8c5a79bcb77 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
@@ -1,15 +1,14 @@
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-; CHECK: %[[#Char:]] = OpTypeInt 8 0
-; CHECK: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]]
-; CHECK: %[[#Int:]] = OpTypeInt 32 0
-; CHECK: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]]
+; CHECK-DAG: %[[#Char:]] = OpTypeInt 8 0
+; CHECK-DAG: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]]
+; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]]
; CHECK: %[[#R1:]] = OpFunctionCall %[[#PtrChar]] %[[#]]
; CHECK: %[[#R2:]] = OpFunctionCall %[[#PtrInt]] %[[#]]
; CHECK: %[[#Casted:]] = OpBitcast %[[#PtrChar]] %[[#R2]]
; CHECK: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#Casted]] %[[#]]
-; CHECK: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#Casted]] %[[#]]
define ptr @foo(i1 %arg) {
entry:
diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll
new file mode 100644
index 00000000000000..b0047ba82c36d1
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll
@@ -0,0 +1,64 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - --translator-compatibility-mode | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: OpName %[[#Bar:]] "bar"
+; CHECK-DAG: OpName %[[#Foo:]] "foo"
+; CHECK-DAG: OpName %[[#Test:]] "test"
+; CHECK-DAG: %[[#Void:]] = OpTypeVoid
+; CHECK-DAG: %[[#Long:]] = OpTypeInt 64 0
+; CHECK-DAG: %[[#LongArr:]] = OpTypeArray %[[#Long]] %[[#]]
+; CHECK-DAG: %[[#StructLongArr:]] = OpTypeStruct %[[#LongArr]]
+; CHECK-DAG: %[[#Struct:]] = OpTypeStruct %[[#StructLongArr]]
+; CHECK-DAG: %[[#StructGenPtr:]] = OpTypePointer Generic %[[#Struct]]
+; CHECK-DAG: %[[#StructFunPtr:]] = OpTypePointer Function %[[#Struct]]
+; CHECK-DAG: %[[#StructGenGenPtr:]] = OpTypePointer Generic %[[#StructGenPtr]]
+; CHECK-DAG: %[[#StructFunGenPtr:]] = OpTypePointer Function %[[#StructGenPtr]]
+
+; CHECK: %[[#Bar]] = OpFunction
+; CHECK: %[[#BarVar:]] = OpVariable %[[#StructFunPtr]] Function
+; CHECK: %[[#BarVarToGen:]] = OpPtrCastToGeneric %[[#StructGenPtr]] %[[#BarVar]]
+; CHECK: %[[#]] = OpFunctionCall %[[#Void]] %[[#Foo]] %[[#BarVarToGen]]
+
+; CHECK: %[[#Foo]] = OpFunction
+; CHECK: %[[#FooArg1:]] = OpFunctionParameter %[[#StructGenPtr]]
+; CHECK: %[[#FooVar:]] = OpVariable %[[#StructFunGenPtr]] Function
+; CHECK: %[[#FooVarToGen:]] = OpPtrCastToGeneric %[[#StructGenGenPtr]] %[[#FooVar]]
+; CHECK: OpStore %[[#FooVarToGen]] %[[#FooArg1]]
+; CHECK: %[[#FooLoad:]] = OpLoad %[[#StructGenPtr]] %[[#FooVarToGen]]
+; CHECK: %[[#]] = OpFunctionCall %[[#Void:]] %[[#Test]] %[[#FooLoad:]]
+
+; CHECK: %[[#Test]] = OpFunction
+; CHECK: %[[#TestArg1:]] = OpFunctionParameter %[[#StructGenPtr]]
+; CHECK: %[[#TestVar:]] = OpVariable %[[#StructFunGenPtr]] Function
+; CHECK: %[[#TestVarToGen:]] = OpPtrCastToGeneric %[[#StructGenGenPtr]] %[[#TestVar]]
+; CHECK: OpStore %[[#TestVarToGen]] %[[#TestArg1]]
+
+%t_range = type { %t_arr }
+%t_arr = type { [1 x i64] }
+
+define internal spir_func void @bar() {
+ %GlobalOffset = alloca %t_range, align 8
+ %GlobalOffset.ascast = addrspacecast ptr %GlobalOffset to ptr addrspace(4)
+ call spir_func void @foo(ptr addrspace(4) noundef align 8 dereferenceable(8) %GlobalOffset.ascast)
+ ret void
+}
+
+define internal spir_func void @foo(ptr addrspace(4) noundef align 8 dereferenceable(8) %Offset) {
+entry:
+ %Offset.addr = alloca ptr addrspace(4), align 8
+ %Offset.addr.ascast = addrspacecast ptr %Offset.addr to ptr addrspace(4)
+ store ptr addrspace(4) %Offset, ptr addrspace(4) %Offset.addr.ascast, align 8
+ %r2 = load ptr addrspace(4), ptr addrspace(4) %Offset.addr.ascast, align 8
+ call spir_func void @test(ptr addrspace(4) noundef align 8 dereferenceable(8) %r2)
+ ret void
+}
+
+define void @test(ptr addrspace(4) noundef align 8 dereferenceable(8) %offset) {
+ %offset.addr = alloca ptr addrspace(4), align 8
+ %offset.addr.ascast = addrspacecast ptr %offset.addr to ptr addrspace(4)
+ store ptr addrspace(4) %offset, ptr addrspace(4) %offset.addr.ascast, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll
index 54b2c786747768..2cba0f6ebd74be 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll
@@ -2,9 +2,7 @@
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; CHECK-SPIRV-DAG: %[[#Char:]] = OpTypeInt 8 0
-; CHECK-SPIRV-DAG: %[[#GlobalCharPtr:]] = OpTypePointer CrossWorkgroup %[[#Char]]
; CHECK-SPIRV-DAG: %[[#LocalCharPtr:]] = OpTypePointer Workgroup %[[#Char]]
-; CHECK-SPIRV-DAG: %[[#PrivateCharPtr:]] = OpTypePointer Function %[[#Char]]
; CHECK-SPIRV-DAG: %[[#GenericCharPtr:]] = OpTypePointer Generic %[[#Char]]
; CHECK-SPIRV-DAG: %[[#Int:]] = OpTypeInt 32 0
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
index fcb61911e0d292..e512f909cfd059 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
@@ -64,6 +64,19 @@ declare dso_local spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU
; CHECK: OpGroupWaitEvents %[[#]] %[[#]] %[[#EventVarBarGen]]
; CHECK: OpFunctionEnd
+; CHECK2: OpFunction
+; CHECK2: %[[#BarArg1:]] = OpFunctionParameter %[[#TyPtrSV4_W]]
+; CHECK2: %[[#BarArg2:]] = OpFunctionParameter %[[#TyPtrSV4_CW]]
+; CHECK2: %[[#EventVarBar:]] = OpVariable %[[#TyEventPtr]] Function
+; CHECK2: %[[#SrcBar:]] = OpInBoundsPtrAccessChain %[[#TyPtrSV4_CW]] %[[#BarArg2]] %[[#]]
+; CHECK2-DAG: %[[#BarArg1Casted:]] = OpBitcast %[[#TyPtrV4_W]] %[[#BarArg1]]
+; CHECK2-DAG: %[[#SrcBarCasted:]] = OpBitcast %[[#TyPtrV4_CW]] %[[#SrcBar]]
+; CHECK2: %[[#ResBar:]] = OpGroupAsyncCopy %[[#TyEvent]] %[[#]] %[[#BarArg1Casted]] %[[#SrcBarCasted]] %[[#]] %[[#]] %[[#ConstEvent]]
+; CHECK2: OpStore %[[#EventVarBar]] %[[#ResBar]]
+; CHECK2: %[[#EventVarBarGen:]] = OpPtrCastToGeneric %[[#TyEventPtrGen]] %[[#EventVarBar]]
+; CHECK2: OpGroupWaitEvents %[[#]] %[[#]] %[[#EventVarBarGen]]
+; CHECK2: OpFunctionEnd
+
%Vec4 = type { <4 x i8> }
define spir_kernel void @bar(ptr addrspace(3) %_arg_Local, ptr addrspace(1) readonly %_arg) {
diff --git a/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll b/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll
new file mode 100644
index 00000000000000..77ed1d6fecf9ae
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll
@@ -0,0 +1,974 @@
+; This is an excerpt from the SYCL end-to-end test suite, cleaned out from unrelevant details,
+; that reproduced multiple cases of the issues when OpPhi's result type mismatches with operand types.
+; The only pass criterion is that spirv-val considers output valid.
+
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+%struct.PFWGFunctor = type { i64, i64, i32, i32, %"class.sycl::_V1::accessor" }
+%"class.sycl::_V1::accessor" = type { %"class.sycl::_V1::detail::AccessorImplDevice", %union.anon }
+%"class.sycl::_V1::detail::AccessorImplDevice" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range" }
+%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" }
+%"class.sycl::_V1::detail::array" = type { [1 x i64] }
+%union.anon = type { ptr addrspace(1) }
+%class.anon.2 = type { %"class.sycl::_V1::accessor" }
+%"class.sycl::_V1::group" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range" }
+%"class.sycl::_V1::group.15" = type { %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16" }
+%"class.sycl::_V1::range.16" = type { %"class.sycl::_V1::detail::array.17" }
+%"class.sycl::_V1::detail::array.17" = type { [2 x i64] }
+%"class.sycl::_V1::private_memory" = type { %struct.MyStruct }
+%struct.MyStruct = type { i32, i32 }
+
+ at GFunctor = internal addrspace(3) global %struct.PFWGFunctor undef, align 8
+ at WI.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WI.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WI.2 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WI.3 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WI.4 = internal unnamed_addr addrspace(3) global i32 undef, align 8
+ at WI.6 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCnt = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at __spirv_BuiltInNumWorkgroups = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at GKernel1 = internal addrspace(3) global %class.anon.2 undef, align 8
+ at GCnt2 = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at GKernel2 = internal addrspace(3) global %class.anon.2 undef, align 8
+ at GCnt3 = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at GKernel3 = internal addrspace(3) global %class.anon.2 undef, align 8
+ at GCnt4 = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at GKernel4 = internal addrspace(3) global %class.anon.2 undef, align 8
+ at GCnt5 = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at __spirv_BuiltInLocalInvocationIndex = external local_unnamed_addr addrspace(1) constant i64, align 8
+ at GThis = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GAsCast = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.1.0 = internal unnamed_addr addrspace(3) global i64 undef, align 16
+ at WGCopy.1.1 = internal unnamed_addr addrspace(3) global i64 undef, align 16
+ at WGCopy.1.2 = internal unnamed_addr addrspace(3) global i64 undef, align 16
+ at WGCopy.1.3 = internal unnamed_addr addrspace(3) global i64 undef, align 16
+ at WGCopy.1.4 = internal unnamed_addr addrspace(3) global i32 undef, align 16
+ at WGCopy.1.5 = internal unnamed_addr addrspace(3) global i32 undef, align 16
+ at WGCopy.1.6 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 16
+ at ArgShadow = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16
+ at GAsCast2 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp2 = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy.3.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.4.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.5.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.6.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at ArgShadow.7 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16
+ at GAscast3 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp3 = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy.9.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.10.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at ArgShadow.11 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16
+ at GAsCast4 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp4 = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy.13.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.13.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.14.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.14.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.15.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.15.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.16.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.16.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at ArgShadow.17 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group.15" undef, align 16
+ at GAsCast5 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp5 = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy.19.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.20.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.20.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at ArgShadow.21 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16
+ at __spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInGlobalSize = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInWorkgroupId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInWorkgroupSize = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @_ZTS11PFWGFunctor(i64 noundef %_arg_wg_chunk, i64 noundef %_arg_range_length, i32 noundef %_arg_n_iter, i32 noundef %_arg_addend, ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+ %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8
+ store i64 %_arg_wg_chunk, ptr addrspace(3) @GFunctor, align 8
+ store i64 %_arg_range_length, ptr addrspace(3) undef, align 8
+ store i32 %_arg_n_iter, ptr addrspace(3) undef, align 8
+ store i32 %_arg_addend, ptr addrspace(3) undef, align 4
+ %0 = load i64, ptr %_arg_dev_ptr1, align 8
+ %1 = load i64, ptr %_arg_dev_ptr2, align 8
+ %2 = load i64, ptr %_arg_dev_ptr3, align 8
+ store i64 %2, ptr addrspace(3) undef, align 8
+ store i64 %0, ptr addrspace(3) undef, align 8
+ store i64 %1, ptr addrspace(3) undef, align 8
+ %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+ store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+ %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+ %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+ %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+ %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67)
+ store i64 %3, ptr %agg.tmp67, align 1
+ %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+ store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+ store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+ store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+ %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %cmpz15.i = icmp eq i64 %7, 0
+ br i1 %cmpz15.i, label %leader.i, label %merge.i
+
+leader.i: ; preds = %entry
+ call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false)
+ br label %merge.i
+
+merge.i: ; preds = %leader.i, %entry
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow, i64 32, i1 false)
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i: ; preds = %merge.i
+ %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+ store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast, align 8
+ store ptr addrspace(4) addrspacecast (ptr addrspace(3) @GFunctor to ptr addrspace(4)), ptr addrspace(3) @GThis, align 8
+ %8 = load i32, ptr addrspace(3) undef, align 4
+ %9 = load i64, ptr addrspace(3) @GFunctor, align 8
+ %index.i = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+ %10 = load i64, ptr %index.i, align 8
+ %mul.i = mul i64 %9, %10
+ %localRange.i = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+ %11 = load i64, ptr %localRange.i, align 8
+ %12 = load i64, ptr addrspace(3) undef, align 8
+ store i64 %9, ptr addrspace(3) @WI.0, align 8
+ store i64 %11, ptr addrspace(3) @WI.1, align 8
+ store i64 %mul.i, ptr addrspace(3) @WI.2, align 8
+ store i64 %12, ptr addrspace(3) @WI.3, align 8
+ store i32 %8, ptr addrspace(3) @WI.4, align 8
+ store ptr addrspace(4) undef, ptr addrspace(3) @WI.6, align 8
+ store i32 0, ptr addrspace(3) @GCnt, align 4
+ br label %wg_cf.i
+
+wg_cf.i: ; preds = %wg_leader.i, %merge.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %wg_val_this1.i = load ptr addrspace(4), ptr addrspace(3) @GThis, align 8
+ %n_iter.i = getelementptr inbounds i8, ptr addrspace(4) %wg_val_this1.i, i64 16
+ %13 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+ br label %for.cond.i
+
+for.cond.i: ; preds = %wg_cf11.i, %wg_cf.i
+ %agg.tmp.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.0.0.copyload13, %wg_cf11.i ]
+ %agg.tmp.i.sroa.6.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.6.0.copyload15, %wg_cf11.i ]
+ %agg.tmp.i.sroa.7.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.7.0.copyload17, %wg_cf11.i ]
+ %agg.tmp.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.8.0.copyload19, %wg_cf11.i ]
+ %agg.tmp.i.sroa.9.0 = phi i32 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.9.0.copyload21, %wg_cf11.i ]
+ %agg.tmp.i.sroa.10.0 = phi i32 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.10.0.copyload23, %wg_cf11.i ]
+ %agg.tmp.i.sroa.11.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.11.0.copyload25, %wg_cf11.i ]
+ %this.addr.0.i = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GFunctor to ptr addrspace(4)), %wg_cf.i ], [ %mat_ld13.i, %wg_cf11.i ]
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %wg_leader4.i, label %wg_cf5.i
+
+wg_leader4.i: ; preds = %for.cond.i
+ %14 = load i32, ptr addrspace(3) @GCnt, align 4
+ %15 = load i32, ptr addrspace(4) %n_iter.i, align 8
+ %cmp.i = icmp slt i32 %14, %15
+ store i1 %cmp.i, ptr addrspace(3) @GCmp, align 1
+ br label %wg_cf5.i
+
+wg_cf5.i: ; preds = %wg_leader4.i, %for.cond.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp, align 1
+ br i1 %wg_val_cmp.i, label %for.body.i, label %_ZNK11PFWGFunctorclEN4sycl3_V15groupILi1EEE.exit
+
+for.body.i: ; preds = %wg_cf5.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %wg_leader7.i, label %wg_cf8.i
+
+wg_leader7.i: ; preds = %for.body.i
+ %agg.tmp.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WI.0, align 8
+ %agg.tmp.i.sroa.6.0.copyload = load i64, ptr addrspace(3) @WI.1, align 8
+ %agg.tmp.i.sroa.7.0.copyload = load i64, ptr addrspace(3) @WI.2, align 8
+ %agg.tmp.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WI.3, align 8
+ %agg.tmp.i.sroa.9.0.copyload = load i32, ptr addrspace(3) @WI.4, align 8
+ %agg.tmp.i.sroa.11.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WI.6, align 8
+ br label %wg_cf8.i
+
+wg_cf8.i: ; preds = %wg_leader7.i, %for.body.i
+ %agg.tmp.i.sroa.0.1 = phi i64 [ %agg.tmp.i.sroa.0.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.0.0, %for.body.i ]
+ %agg.tmp.i.sroa.6.1 = phi i64 [ %agg.tmp.i.sroa.6.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.6.0, %for.body.i ]
+ %agg.tmp.i.sroa.7.1 = phi i64 [ %agg.tmp.i.sroa.7.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.7.0, %for.body.i ]
+ %agg.tmp.i.sroa.8.1 = phi i64 [ %agg.tmp.i.sroa.8.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.8.0, %for.body.i ]
+ %agg.tmp.i.sroa.9.1 = phi i32 [ %agg.tmp.i.sroa.9.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.9.0, %for.body.i ]
+ %agg.tmp.i.sroa.11.1 = phi ptr addrspace(4) [ %agg.tmp.i.sroa.11.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.11.0, %for.body.i ]
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i: ; preds = %wg_cf8.i
+ store i64 %agg.tmp.i.sroa.0.1, ptr addrspace(3) @WGCopy.1.0, align 16
+ store i64 %agg.tmp.i.sroa.6.1, ptr addrspace(3) @WGCopy.1.1, align 16
+ store i64 %agg.tmp.i.sroa.7.1, ptr addrspace(3) @WGCopy.1.2, align 16
+ store i64 %agg.tmp.i.sroa.8.1, ptr addrspace(3) @WGCopy.1.3, align 16
+ store i32 %agg.tmp.i.sroa.9.1, ptr addrspace(3) @WGCopy.1.4, align 16
+ store i32 %agg.tmp.i.sroa.10.0, ptr addrspace(3) @WGCopy.1.5, align 16
+ store ptr addrspace(4) %agg.tmp.i.sroa.11.1, ptr addrspace(3) @WGCopy.1.6, align 16
+ store ptr addrspace(4) %this.addr.0.i, ptr addrspace(3) @WGCopy, align 8
+ br label %LeaderMat.i
+
+LeaderMat.i: ; preds = %TestMat.i, %wg_cf8.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %mat_ld13.i = load ptr addrspace(4), ptr addrspace(3) @WGCopy, align 8
+ %agg.tmp.i.sroa.0.0.copyload13 = load i64, ptr addrspace(3) @WGCopy.1.0, align 16
+ %agg.tmp.i.sroa.6.0.copyload15 = load i64, ptr addrspace(3) @WGCopy.1.1, align 16
+ %agg.tmp.i.sroa.7.0.copyload17 = load i64, ptr addrspace(3) @WGCopy.1.2, align 16
+ %agg.tmp.i.sroa.8.0.copyload19 = load i64, ptr addrspace(3) @WGCopy.1.3, align 16
+ %agg.tmp.i.sroa.9.0.copyload21 = load i32, ptr addrspace(3) @WGCopy.1.4, align 16
+ %agg.tmp.i.sroa.10.0.copyload23 = load i32, ptr addrspace(3) @WGCopy.1.5, align 16
+ %agg.tmp.i.sroa.11.0.copyload25 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.1.6, align 16
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ %cmp.not.i.i = icmp ult i64 %13, %agg.tmp.i.sroa.0.0.copyload13
+ br i1 %cmp.not.i.i, label %if.end.i.i, label %lexit1
+
+if.end.i.i: ; preds = %LeaderMat.i
+ %add.i.i = add i64 %agg.tmp.i.sroa.0.0.copyload13, %agg.tmp.i.sroa.6.0.copyload15
+ %sub.i.i = add i64 %add.i.i, -1
+ %div.i.i = udiv i64 %sub.i.i, %agg.tmp.i.sroa.6.0.copyload15
+ %mul.i.i = mul i64 %13, %div.i.i
+ %add4.i.i = add i64 %agg.tmp.i.sroa.7.0.copyload17, %mul.i.i
+ %add6.i.i = add i64 %add4.i.i, %div.i.i
+ %.sroa.speculated.i.i = call i64 @llvm.umin.i64(i64 %agg.tmp.i.sroa.8.0.copyload19, i64 %add6.i.i)
+ %16 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp.i.sroa.11.0.copyload25, i64 24
+ br label %for.cond.i.i
+
+for.cond.i.i: ; preds = %for.body.i.i, %if.end.i.i
+ %ind.0.i.i = phi i64 [ %add4.i.i, %if.end.i.i ], [ %inc.i.i, %for.body.i.i ]
+ %cmp8.i.i = icmp ult i64 %ind.0.i.i, %.sroa.speculated.i.i
+ br i1 %cmp8.i.i, label %for.body.i.i, label %lexit1
+
+for.body.i.i: ; preds = %for.cond.i.i
+ %17 = load ptr addrspace(1), ptr addrspace(4) %16, align 8
+ %arrayidx.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %17, i64 %ind.0.i.i
+ %18 = load i32, ptr addrspace(1) %arrayidx.i.i.i, align 4
+ %add10.i.i = add nsw i32 %18, %agg.tmp.i.sroa.9.0.copyload21
+ store i32 %add10.i.i, ptr addrspace(1) %arrayidx.i.i.i, align 4
+ %inc.i.i = add nuw i64 %ind.0.i.i, 1
+ br label %for.cond.i.i
+
+lexit1: ; preds = %for.cond.i.i, %LeaderMat.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %wg_leader10.i, label %wg_cf11.i
+
+wg_leader10.i: ; preds = %lexit1
+ %19 = load i32, ptr addrspace(3) @GCnt, align 4
+ %inc.i = add nsw i32 %19, 1
+ store i32 %inc.i, ptr addrspace(3) @GCnt, align 4
+ br label %wg_cf11.i
+
+wg_cf11.i: ; preds = %wg_leader10.i, %lexit1
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br label %for.cond.i
+
+_ZNK11PFWGFunctorclEN4sycl3_V15groupILi1EEE.exit: ; preds = %wg_cf5.i
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67)
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+; Function Attrs: convergent nounwind
+declare dso_local spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef, i32 noundef, i32 noundef)
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p3.i64(ptr noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg)
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.umin.i64(i64, i64)
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @bar(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+ %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8
+ %0 = load i64, ptr %_arg_dev_ptr1, align 8
+ %1 = load i64, ptr %_arg_dev_ptr2, align 8
+ %2 = load i64, ptr %_arg_dev_ptr3, align 8
+ store i64 %2, ptr addrspace(3) @GKernel1, align 8
+ store i64 %0, ptr addrspace(3) undef, align 8
+ store i64 %1, ptr addrspace(3) undef, align 8
+ %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+ store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+ %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+ %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+ %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+ %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67)
+ store i64 %3, ptr %agg.tmp67, align 1
+ %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+ store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+ store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+ store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+ %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %cmpz27.i = icmp eq i64 %7, 0
+ br i1 %cmpz27.i, label %leader.i, label %merge.i
+
+leader.i: ; preds = %entry
+ call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.7, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false)
+ br label %merge.i
+
+merge.i: ; preds = %leader.i, %entry
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.7, i64 32, i1 false)
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz27.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i: ; preds = %merge.i
+ %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+ store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast2, align 8
+ store i32 0, ptr addrspace(3) @GCnt2, align 4
+ br label %wg_cf.i
+
+wg_cf.i: ; preds = %wg_leader.i, %merge.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %8 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
+ %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+ %cmp.i.i.i.i.i.i = icmp ult i64 %8, 2147483648
+ br label %for.cond.i
+
+for.cond.i: ; preds = %wg_cf18.i, %wg_cf.i
+ %agg.tmp5.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %18, %wg_cf18.i ]
+ %agg.tmp4.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %17, %wg_cf18.i ]
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz27.i, label %wg_leader8.i, label %wg_cf9.i
+
+wg_leader8.i: ; preds = %for.cond.i
+ %10 = load i32, ptr addrspace(3) @GCnt2, align 4
+ %cmp.i = icmp slt i32 %10, 2
+ store i1 %cmp.i, ptr addrspace(3) @GCmp2, align 1
+ br label %wg_cf9.i
+
+wg_cf9.i: ; preds = %wg_leader8.i, %for.cond.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp2, align 1
+ br i1 %wg_val_cmp.i, label %for.body.i, label %lexit2
+
+for.body.i: ; preds = %wg_cf9.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz27.i, label %TestMat25.i, label %LeaderMat22.i
+
+TestMat25.i: ; preds = %for.body.i
+ store i64 %agg.tmp5.i.sroa.0.0, ptr addrspace(3) @WGCopy.6.0, align 8
+ store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel1 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.4.0, align 8
+ store i64 5, ptr addrspace(3) @WGCopy.3.0, align 8
+ store i64 %agg.tmp4.i.sroa.0.0, ptr addrspace(3) @WGCopy.5.0, align 8
+ br label %LeaderMat22.i
+
+LeaderMat22.i: ; preds = %TestMat25.i, %for.body.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %11 = load i64, ptr addrspace(3) @WGCopy.3.0, align 8
+ %12 = load i64, ptr addrspace(3) @WGCopy.4.0, align 8
+ %13 = inttoptr i64 %12 to ptr addrspace(4)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ %14 = getelementptr inbounds i8, ptr addrspace(4) %13, i64 24
+ br label %for.cond.i.i
+
+for.cond.i.i: ; preds = %for.body.i.i, %LeaderMat22.i
+ %storemerge.i.i = phi i64 [ %9, %LeaderMat22.i ], [ %add.i.i, %for.body.i.i ]
+ %cmp.i.i = icmp ult i64 %storemerge.i.i, %11
+ br i1 %cmp.i.i, label %for.body.i.i, label %lexit3
+
+for.body.i.i: ; preds = %for.cond.i.i
+ call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+ %15 = load ptr addrspace(1), ptr addrspace(4) %14, align 8
+ %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %8
+ %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+ %inc.i.i.i.i = add nsw i32 %16, 1
+ store i32 %inc.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+ %add.i.i = add i64 %storemerge.i.i, %4
+ br label %for.cond.i.i
+
+lexit3: ; preds = %for.cond.i.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz27.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i: ; preds = %lexit3
+ store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel1 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.6.0, align 8
+ store i64 %12, ptr addrspace(3) @WGCopy.4.0, align 8
+ store i64 %11, ptr addrspace(3) @WGCopy.3.0, align 8
+ store i64 2, ptr addrspace(3) @WGCopy.5.0, align 8
+ br label %LeaderMat.i
+
+LeaderMat.i: ; preds = %TestMat.i, %lexit3
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %17 = load i64, ptr addrspace(3) @WGCopy.5.0, align 8
+ %18 = load i64, ptr addrspace(3) @WGCopy.6.0, align 8
+ %19 = inttoptr i64 %18 to ptr addrspace(4)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ %20 = getelementptr inbounds i8, ptr addrspace(4) %19, i64 24
+ br label %for.cond.i.i19
+
+for.cond.i.i19: ; preds = %for.body.i.i22, %LeaderMat.i
+ %storemerge.i.i20 = phi i64 [ %9, %LeaderMat.i ], [ %add.i.i26, %for.body.i.i22 ]
+ %cmp.i.i21 = icmp ult i64 %storemerge.i.i20, %17
+ br i1 %cmp.i.i21, label %for.body.i.i22, label %lexit4
+
+for.body.i.i22: ; preds = %for.cond.i.i19
+ call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+ %21 = load ptr addrspace(1), ptr addrspace(4) %20, align 8
+ %arrayidx.i.i.i.i.i23 = getelementptr inbounds i32, ptr addrspace(1) %21, i64 %8
+ %22 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i23, align 4
+ %inc.i.i.i.i25 = add nsw i32 %22, 1
+ store i32 %inc.i.i.i.i25, ptr addrspace(1) %arrayidx.i.i.i.i.i23, align 4
+ %add.i.i26 = add i64 %storemerge.i.i20, %4
+ br label %for.cond.i.i19
+
+lexit4: ; preds = %for.cond.i.i19
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz27.i, label %wg_leader17.i, label %wg_cf18.i
+
+wg_leader17.i: ; preds = %lexit4
+ %23 = load i32, ptr addrspace(3) @GCnt2, align 4
+ %inc.i = add nsw i32 %23, 1
+ store i32 %inc.i, ptr addrspace(3) @GCnt2, align 4
+ br label %wg_cf18.i
+
+wg_cf18.i: ; preds = %wg_leader17.i, %lexit4
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br label %for.cond.i
+
+lexit2: ; preds = %wg_cf9.i
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67)
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef)
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @test1(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+ %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8
+ %0 = load i64, ptr %_arg_dev_ptr1, align 8
+ %1 = load i64, ptr %_arg_dev_ptr2, align 8
+ %2 = load i64, ptr %_arg_dev_ptr3, align 8
+ store i64 %2, ptr addrspace(3) @GKernel2, align 8
+ store i64 %0, ptr addrspace(3) undef, align 8
+ store i64 %1, ptr addrspace(3) undef, align 8
+ %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+ store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+ %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+ %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+ %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+ %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67)
+ store i64 %3, ptr %agg.tmp67, align 1
+ %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+ store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+ store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+ store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+ %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %cmpz15.i = icmp eq i64 %7, 0
+ br i1 %cmpz15.i, label %leader.i, label %merge.i
+
+leader.i: ; preds = %entry
+ call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.11, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false)
+ br label %merge.i
+
+merge.i: ; preds = %leader.i, %entry
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.11, i64 32, i1 false)
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i: ; preds = %merge.i
+ %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+ store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAscast3, align 8
+ store i32 0, ptr addrspace(3) @GCnt3, align 4
+ br label %wg_cf.i
+
+wg_cf.i: ; preds = %wg_leader.i, %merge.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %8 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
+ %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+ %cmp.i.i.i.i.i.i = icmp ult i64 %8, 2147483648
+ br label %for.cond.i
+
+for.cond.i: ; preds = %wg_cf11.i, %wg_cf.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %wg_leader4.i, label %wg_cf5.i
+
+wg_leader4.i: ; preds = %for.cond.i
+ %10 = load i32, ptr addrspace(3) @GCnt3, align 4
+ %cmp.i = icmp slt i32 %10, 2
+ store i1 %cmp.i, ptr addrspace(3) @GCmp3, align 1
+ br label %wg_cf5.i
+
+wg_cf5.i: ; preds = %wg_leader4.i, %for.cond.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp3, align 1
+ br i1 %wg_val_cmp.i, label %for.body.i, label %lexit6
+
+for.body.i: ; preds = %wg_cf5.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i: ; preds = %for.body.i
+ store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel2 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.10.0, align 8
+ store i64 5, ptr addrspace(3) @WGCopy.9.0, align 8
+ br label %LeaderMat.i
+
+LeaderMat.i: ; preds = %TestMat.i, %for.body.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %11 = load i64, ptr addrspace(3) @WGCopy.9.0, align 8
+ %12 = load i64, ptr addrspace(3) @WGCopy.10.0, align 8
+ %13 = inttoptr i64 %12 to ptr addrspace(4)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ %14 = getelementptr inbounds i8, ptr addrspace(4) %13, i64 24
+ br label %for.cond.i.i
+
+for.cond.i.i: ; preds = %for.body.i.i, %LeaderMat.i
+ %storemerge.i.i = phi i64 [ %9, %LeaderMat.i ], [ %add.i.i, %for.body.i.i ]
+ %cmp.i.i = icmp ult i64 %storemerge.i.i, %11
+ br i1 %cmp.i.i, label %for.body.i.i, label %lexit7
+
+for.body.i.i: ; preds = %for.cond.i.i
+ %cmp5.not.i.i.i.i.i.i = icmp ne i64 %storemerge.i.i, %9
+ %cond.i.i.i.i = zext i1 %cmp5.not.i.i.i.i.i.i to i32
+ call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+ %15 = load ptr addrspace(1), ptr addrspace(4) %14, align 8
+ %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %8
+ %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+ %add.i.i.i.i = add nsw i32 %16, %cond.i.i.i.i
+ store i32 %add.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+ %add.i.i = add i64 %storemerge.i.i, %4
+ br label %for.cond.i.i
+
+lexit7: ; preds = %for.cond.i.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz15.i, label %wg_leader10.i, label %wg_cf11.i
+
+wg_leader10.i: ; preds = %lexit7
+ %17 = load i32, ptr addrspace(3) @GCnt3, align 4
+ %inc.i = add nsw i32 %17, 1
+ store i32 %inc.i, ptr addrspace(3) @GCnt3, align 4
+ br label %wg_cf11.i
+
+wg_cf11.i: ; preds = %wg_leader10.i, %lexit7
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br label %for.cond.i
+
+lexit6: ; preds = %wg_cf5.i
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67)
+ ret void
+}
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @test2(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+ %priv.i = alloca %"class.sycl::_V1::private_memory", align 4
+ %agg.tmp67 = alloca %"class.sycl::_V1::group.15", align 8
+ %0 = load i64, ptr %_arg_dev_ptr1, align 8
+ %1 = load i64, ptr %_arg_dev_ptr2, align 8
+ %2 = load i64, ptr %_arg_dev_ptr3, align 8
+ store i64 %2, ptr addrspace(3) @GKernel3, align 8
+ store i64 %0, ptr addrspace(3) undef, align 8
+ store i64 %1, ptr addrspace(3) undef, align 8
+ %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+ store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+ %3 = load i64, ptr addrspace(1) undef, align 8
+ %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+ %5 = load i64, ptr addrspace(1) undef, align 8
+ %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+ %7 = load i64, ptr addrspace(1) undef, align 8
+ %8 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+ %9 = load i64, ptr addrspace(1) undef, align 8
+ %10 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+ call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %agg.tmp67)
+ store i64 %3, ptr %agg.tmp67, align 1
+ %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+ store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+ store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+ store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.5.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 32
+ store i64 %7, ptr %agg.tmp6.sroa.5.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.6.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 40
+ store i64 %8, ptr %agg.tmp6.sroa.6.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.7.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 48
+ store i64 %9, ptr %agg.tmp6.sroa.7.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.8.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 56
+ store i64 %10, ptr %agg.tmp6.sroa.8.0.agg.tmp67.sroa_idx, align 1
+ %11 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %cmpz32.i = icmp eq i64 %11, 0
+ br i1 %cmpz32.i, label %leader.i, label %merge.i
+
+leader.i: ; preds = %entry
+ call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(64) @ArgShadow.17, ptr noundef nonnull align 8 dereferenceable(64) %agg.tmp67, i64 64, i1 false)
+ br label %merge.i
+
+merge.i: ; preds = %leader.i, %entry
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(64) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(64) @ArgShadow.17, i64 64, i1 false)
+ %priv.ascast.i = addrspacecast ptr %priv.i to ptr addrspace(4)
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz32.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i: ; preds = %merge.i
+ %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+ store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast4, align 8
+ call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %priv.i)
+ store i32 0, ptr addrspace(3) @GCnt4, align 4
+ br label %wg_cf.i
+
+wg_cf.i: ; preds = %wg_leader.i, %merge.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %12 = load i64, ptr addrspace(1) undef, align 8
+ %13 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
+ %14 = load i64, ptr addrspace(1) undef, align 8
+ %15 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+ %mul.i.i.i.i.i.i = mul i64 %12, %4
+ %add.i.i.i.i.i.i = add i64 %mul.i.i.i.i.i.i, %13
+ %cmp.i.i.i.i.i.i = icmp ult i64 %add.i.i.i.i.i.i, 2147483648
+ %conv.i.i.i.i.i = trunc i64 %add.i.i.i.i.i.i to i32
+ %y.i.i.i.i.i = getelementptr inbounds i8, ptr %priv.i, i64 4
+ br label %for.cond.i
+
+for.cond.i: ; preds = %wg_cf20.i, %wg_cf.i
+ %agg.tmp6.i.sroa.9.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp6.i.sroa.9.0.copyload40, %wg_cf20.i ]
+ %agg.tmp5.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp5.i.sroa.0.0.copyload44, %wg_cf20.i ]
+ %agg.tmp5.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp5.i.sroa.8.0.copyload48, %wg_cf20.i ]
+ %agg.tmp2.i.sroa.0.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp2.i.sroa.0.0.copyload52, %wg_cf20.i ]
+ %agg.tmp2.i.sroa.8.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp2.i.sroa.8.0.copyload56, %wg_cf20.i ]
+ %agg.tmp.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.0.0.copyload60, %wg_cf20.i ]
+ %agg.tmp.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.8.0.copyload64, %wg_cf20.i ]
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz32.i, label %wg_leader10.i, label %wg_cf11.i
+
+wg_leader10.i: ; preds = %for.cond.i
+ %16 = load i32, ptr addrspace(3) @GCnt4, align 4
+ %cmp.i = icmp slt i32 %16, 2
+ store i1 %cmp.i, ptr addrspace(3) @GCmp4, align 1
+ br label %wg_cf11.i
+
+wg_cf11.i: ; preds = %wg_leader10.i, %for.cond.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp4, align 1
+ br i1 %wg_val_cmp.i, label %for.body.i, label %for.end.i
+
+for.body.i: ; preds = %wg_cf11.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz32.i, label %wg_leader13.i, label %wg_cf14.i
+
+wg_leader13.i: ; preds = %for.body.i
+ br label %wg_cf14.i
+
+wg_cf14.i: ; preds = %wg_leader13.i, %for.body.i
+ %agg.tmp2.i.sroa.0.1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GKernel3 to ptr addrspace(4)), %wg_leader13.i ], [ %agg.tmp2.i.sroa.0.0, %for.body.i ]
+ %agg.tmp2.i.sroa.8.1 = phi ptr addrspace(4) [ %priv.ascast.i, %wg_leader13.i ], [ %agg.tmp2.i.sroa.8.0, %for.body.i ]
+ %agg.tmp.i.sroa.0.1 = phi i64 [ 7, %wg_leader13.i ], [ %agg.tmp.i.sroa.0.0, %for.body.i ]
+ %agg.tmp.i.sroa.8.1 = phi i64 [ 3, %wg_leader13.i ], [ %agg.tmp.i.sroa.8.0, %for.body.i ]
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz32.i, label %TestMat30.i, label %LeaderMat27.i
+
+TestMat30.i: ; preds = %wg_cf14.i
+ store i64 %agg.tmp.i.sroa.0.1, ptr addrspace(3) @WGCopy.13.0, align 8
+ store i64 %agg.tmp.i.sroa.8.1, ptr addrspace(3) @WGCopy.13.1, align 8
+ store ptr addrspace(4) %agg.tmp2.i.sroa.0.1, ptr addrspace(3) @WGCopy.14.0, align 8
+ store ptr addrspace(4) %agg.tmp2.i.sroa.8.1, ptr addrspace(3) @WGCopy.14.1, align 8
+ store i64 %agg.tmp5.i.sroa.0.0, ptr addrspace(3) @WGCopy.15.0, align 8
+ store i64 %agg.tmp5.i.sroa.8.0, ptr addrspace(3) @WGCopy.15.1, align 8
+ store ptr addrspace(4) %priv.ascast.i, ptr addrspace(3) @WGCopy.16.0, align 8
+ store ptr addrspace(4) %agg.tmp6.i.sroa.9.0, ptr addrspace(3) @WGCopy.16.1, align 8
+ br label %LeaderMat27.i
+
+LeaderMat27.i: ; preds = %TestMat30.i, %wg_cf14.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %agg.tmp6.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.0, align 8
+ %agg.tmp6.i.sroa.9.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.1, align 8
+ %agg.tmp5.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WGCopy.15.0, align 8
+ %agg.tmp5.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WGCopy.15.1, align 8
+ %agg.tmp2.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.0, align 8
+ %agg.tmp.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WGCopy.13.0, align 8
+ %agg.tmp.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WGCopy.13.1, align 8
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ %17 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, i64 24
+ br label %for.cond.i.i
+
+for.cond.i.i: ; preds = %lexit10, %LeaderMat27.i
+ %storemerge.i.i = phi i64 [ %14, %LeaderMat27.i ], [ %add.i.i, %lexit10 ]
+ %cmp.i.i = icmp ult i64 %storemerge.i.i, %agg.tmp.i.sroa.0.0.copyload
+ br i1 %cmp.i.i, label %for.cond.i.i.i, label %lexit11
+
+for.cond.i.i.i: ; preds = %for.body.i.i.i, %for.cond.i.i
+ %storemerge.i.i.i = phi i64 [ %add.i.i.i, %for.body.i.i.i ], [ %15, %for.cond.i.i ]
+ %cmp.i.i.i = icmp ult i64 %storemerge.i.i.i, %agg.tmp.i.sroa.8.0.copyload
+ br i1 %cmp.i.i.i, label %for.body.i.i.i, label %lexit10
+
+for.body.i.i.i: ; preds = %for.cond.i.i.i
+ call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+ %18 = load ptr addrspace(1), ptr addrspace(4) %17, align 8
+ %arrayidx.i.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %18, i64 %add.i.i.i.i.i.i
+ %19 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i.i, align 4
+ %inc.i.i.i.i.i = add nsw i32 %19, 1
+ store i32 %inc.i.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i.i, align 4
+ store i32 %conv.i.i.i.i.i, ptr %priv.i, align 4
+ store i32 5, ptr %y.i.i.i.i.i, align 4
+ %add.i.i.i = add i64 %storemerge.i.i.i, %6
+ br label %for.cond.i.i.i
+
+lexit10: ; preds = %for.cond.i.i.i
+ %add.i.i = add i64 %storemerge.i.i, %5
+ br label %for.cond.i.i
+
+lexit11: ; preds = %for.cond.i.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz32.i, label %wg_leader16.i, label %wg_cf17.i
+
+wg_leader16.i: ; preds = %lexit11
+ br label %wg_cf17.i
+
+wg_cf17.i: ; preds = %wg_leader16.i, %lexit11
+ %agg.tmp6.i.sroa.0.1 = phi ptr addrspace(4) [ %priv.ascast.i, %wg_leader16.i ], [ %agg.tmp6.i.sroa.0.0.copyload, %lexit11 ]
+ %agg.tmp6.i.sroa.9.1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GKernel3 to ptr addrspace(4)), %wg_leader16.i ], [ %agg.tmp6.i.sroa.9.0.copyload, %lexit11 ]
+ %agg.tmp5.i.sroa.0.1 = phi i64 [ 7, %wg_leader16.i ], [ %agg.tmp5.i.sroa.0.0.copyload, %lexit11 ]
+ %agg.tmp5.i.sroa.8.1 = phi i64 [ 3, %wg_leader16.i ], [ %agg.tmp5.i.sroa.8.0.copyload, %lexit11 ]
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz32.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i: ; preds = %wg_cf17.i
+ store i64 %agg.tmp.i.sroa.0.0.copyload, ptr addrspace(3) @WGCopy.13.0, align 8
+ store i64 %agg.tmp.i.sroa.8.0.copyload, ptr addrspace(3) @WGCopy.13.1, align 8
+ store ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, ptr addrspace(3) @WGCopy.14.0, align 8
+ store ptr addrspace(4) %priv.ascast.i, ptr addrspace(3) @WGCopy.14.1, align 8
+ store i64 %agg.tmp5.i.sroa.0.1, ptr addrspace(3) @WGCopy.15.0, align 8
+ store i64 %agg.tmp5.i.sroa.8.1, ptr addrspace(3) @WGCopy.15.1, align 8
+ store ptr addrspace(4) %agg.tmp6.i.sroa.0.1, ptr addrspace(3) @WGCopy.16.0, align 8
+ store ptr addrspace(4) %agg.tmp6.i.sroa.9.1, ptr addrspace(3) @WGCopy.16.1, align 8
+ br label %LeaderMat.i
+
+LeaderMat.i: ; preds = %TestMat.i, %wg_cf17.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %agg.tmp6.i.sroa.9.0.copyload40 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.1, align 8
+ %agg.tmp5.i.sroa.0.0.copyload44 = load i64, ptr addrspace(3) @WGCopy.15.0, align 8
+ %agg.tmp5.i.sroa.8.0.copyload48 = load i64, ptr addrspace(3) @WGCopy.15.1, align 8
+ %agg.tmp2.i.sroa.0.0.copyload52 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.0, align 8
+ %agg.tmp2.i.sroa.8.0.copyload56 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.1, align 8
+ %agg.tmp.i.sroa.0.0.copyload60 = load i64, ptr addrspace(3) @WGCopy.13.0, align 8
+ %agg.tmp.i.sroa.8.0.copyload64 = load i64, ptr addrspace(3) @WGCopy.13.1, align 8
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ %20 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp6.i.sroa.9.0.copyload40, i64 24
+ br label %for.cond.i.i25
+
+for.cond.i.i25: ; preds = %lexit12, %LeaderMat.i
+ %storemerge.i.i26 = phi i64 [ %14, %LeaderMat.i ], [ %add.i.i31, %lexit12 ]
+ %cmp.i.i27 = icmp ult i64 %storemerge.i.i26, %agg.tmp5.i.sroa.0.0.copyload44
+ br i1 %cmp.i.i27, label %for.cond.i.i.i28, label %lexit13
+
+for.cond.i.i.i28: ; preds = %for.body.i.i.i32, %for.cond.i.i25
+ %storemerge.i.i.i29 = phi i64 [ %add.i.i.i35, %for.body.i.i.i32 ], [ %15, %for.cond.i.i25 ]
+ %cmp.i.i.i30 = icmp ult i64 %storemerge.i.i.i29, %agg.tmp5.i.sroa.8.0.copyload48
+ br i1 %cmp.i.i.i30, label %for.body.i.i.i32, label %lexit12
+
+for.body.i.i.i32: ; preds = %for.cond.i.i.i28
+ %21 = load i32, ptr %priv.i, align 4
+ %22 = load i32, ptr %y.i.i.i.i.i, align 4
+ %add.i.i.i.i.i = add nsw i32 %21, %22
+ call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+ %23 = load ptr addrspace(1), ptr addrspace(4) %20, align 8
+ %arrayidx.i.i.i.i.i.i33 = getelementptr inbounds i32, ptr addrspace(1) %23, i64 %add.i.i.i.i.i.i
+ %24 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i.i33, align 4
+ %add4.i.i.i.i.i = add nsw i32 %24, %add.i.i.i.i.i
+ store i32 %add4.i.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i.i33, align 4
+ %add.i.i.i35 = add i64 %storemerge.i.i.i29, %6
+ br label %for.cond.i.i.i28
+
+lexit12: ; preds = %for.cond.i.i.i28
+ %add.i.i31 = add i64 %storemerge.i.i26, %5
+ br label %for.cond.i.i25
+
+lexit13: ; preds = %for.cond.i.i25
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz32.i, label %wg_leader19.i, label %wg_cf20.i
+
+wg_leader19.i: ; preds = %lexit13
+ %25 = load i32, ptr addrspace(3) @GCnt4, align 4
+ %inc.i = add nsw i32 %25, 1
+ store i32 %inc.i, ptr addrspace(3) @GCnt4, align 4
+ br label %wg_cf20.i
+
+wg_cf20.i: ; preds = %wg_leader19.i, %lexit13
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br label %for.cond.i
+
+for.end.i: ; preds = %wg_cf11.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz32.i, label %wg_leader22.i, label %lexit14
+
+wg_leader22.i: ; preds = %for.end.i
+ call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %priv.i)
+ br label %lexit14
+
+lexit14: ; preds = %wg_leader22.i, %for.end.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %agg.tmp67)
+ ret void
+}
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @test3(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+ %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8
+ %0 = load i64, ptr %_arg_dev_ptr1, align 8
+ %1 = load i64, ptr %_arg_dev_ptr2, align 8
+ %2 = load i64, ptr %_arg_dev_ptr3, align 8
+ store i64 %2, ptr addrspace(3) @GKernel4, align 8
+ store i64 %0, ptr addrspace(3) undef, align 8
+ store i64 %1, ptr addrspace(3) undef, align 8
+ %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+ store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+ %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+ %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+ %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+ %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67)
+ store i64 %3, ptr %agg.tmp67, align 1
+ %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+ store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+ store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+ %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+ store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+ %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %cmpz16.i = icmp eq i64 %7, 0
+ br i1 %cmpz16.i, label %leader.i, label %merge.i
+
+leader.i: ; preds = %entry
+ call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.21, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false)
+ br label %merge.i
+
+merge.i: ; preds = %leader.i, %entry
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.21, i64 32, i1 false)
+ tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz16.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i: ; preds = %merge.i
+ %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+ store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast5, align 8
+ store i32 0, ptr addrspace(3) @GCnt5, align 4
+ br label %wg_cf.i
+
+wg_cf.i: ; preds = %wg_leader.i, %merge.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %wg_val_g.ascast.i = load ptr addrspace(4), ptr addrspace(3) @GAsCast5, align 8
+ %8 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+ %9 = trunc i64 %4 to i32
+ br label %for.cond.i
+
+for.cond.i: ; preds = %wg_cf12.i, %wg_cf.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz16.i, label %wg_leader5.i, label %wg_cf6.i
+
+wg_leader5.i: ; preds = %for.cond.i
+ %10 = load i32, ptr addrspace(3) @GCnt5, align 4
+ %cmp.i = icmp slt i32 %10, 2
+ store i1 %cmp.i, ptr addrspace(3) @GCmp5, align 1
+ br label %wg_cf6.i
+
+wg_cf6.i: ; preds = %wg_leader5.i, %for.cond.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp5, align 1
+ br i1 %wg_val_cmp.i, label %for.body.i, label %lexit20
+
+for.body.i: ; preds = %wg_cf6.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz16.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i: ; preds = %for.body.i
+ store ptr addrspace(4) %wg_val_g.ascast.i, ptr addrspace(3) @WGCopy.20.0, align 8
+ store ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel4 to ptr addrspace(4)), ptr addrspace(3) @WGCopy.20.1, align 8
+ store i64 5, ptr addrspace(3) @WGCopy.19.0, align 8
+ br label %LeaderMat.i
+
+LeaderMat.i: ; preds = %TestMat.i, %for.body.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ %11 = load i64, ptr addrspace(3) @WGCopy.19.0, align 8
+ %agg.tmp2.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.20.0, align 8
+ %agg.tmp2.i.sroa.6.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.20.1, align 8
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ %index.i.i.i.i.i = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, i64 24
+ %12 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.6.0.copyload, i64 24
+ %13 = trunc i64 %11 to i32
+ br label %for.cond.i.i
+
+for.cond.i.i: ; preds = %for.body.i.i, %LeaderMat.i
+ %storemerge.i.i = phi i64 [ %8, %LeaderMat.i ], [ %add.i.i, %for.body.i.i ]
+ %cmp.i.i = icmp ult i64 %storemerge.i.i, %11
+ br i1 %cmp.i.i, label %for.body.i.i, label %lexit21
+
+for.body.i.i: ; preds = %for.cond.i.i
+ %14 = load i64, ptr addrspace(4) %index.i.i.i.i.i, align 8
+ %mul.i.i.i.i = mul i64 %14, 10
+ %mul3.i.i.i.i = shl i64 %storemerge.i.i, 1
+ %add.i.i.i.i = add i64 %mul.i.i.i.i, %mul3.i.i.i.i
+ %15 = load ptr addrspace(1), ptr addrspace(4) %12, align 8
+ %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %add.i.i.i.i
+ %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+ %conv9.i.i.i.i = add i32 %16, %13
+ store i32 %conv9.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+ %add14.i.i.i.i = or disjoint i64 %add.i.i.i.i, 1
+ %17 = load ptr addrspace(1), ptr addrspace(4) %12, align 8
+ %arrayidx.i25.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %17, i64 %add14.i.i.i.i
+ %18 = load i32, ptr addrspace(1) %arrayidx.i25.i.i.i.i, align 4
+ %conv18.i.i.i.i = add i32 %18, %9
+ store i32 %conv18.i.i.i.i, ptr addrspace(1) %arrayidx.i25.i.i.i.i, align 4
+ %add.i.i = add i64 %storemerge.i.i, %4
+ br label %for.cond.i.i
+
+lexit21: ; preds = %for.cond.i.i
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br i1 %cmpz16.i, label %wg_leader11.i, label %wg_cf12.i
+
+wg_leader11.i: ; preds = %lexit21
+ %19 = load i32, ptr addrspace(3) @GCnt5, align 4
+ %inc.i = add nsw i32 %19, 1
+ store i32 %inc.i, ptr addrspace(3) @GCnt5, align 4
+ br label %wg_cf12.i
+
+wg_cf12.i: ; preds = %wg_leader11.i, %lexit21
+ call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+ br label %for.cond.i
+
+lexit20: ; preds = %wg_cf6.i
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67)
+ ret void
+}
More information about the llvm-commits
mailing list