[llvm] [SPIRV] Improve type inference of operand presented by opaque pointers and aggregate types (PR #98035)

Vyacheslav Levytskyy via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 9 08:40:34 PDT 2024


https://github.com/VyacheslavLevytskyy updated https://github.com/llvm/llvm-project/pull/98035

>From 475f2f95a1e7039bd9ca6945340a4db1d24976db Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Mon, 8 Jul 2024 08:44:44 -0700
Subject: [PATCH 1/5] improve type inference of operand presented by opaque
 pointers and aggregate types

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 167 +++++++++++++-----
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h   |  16 ++
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |   2 +-
 llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp  |   5 +-
 llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp   |   4 +-
 .../Target/SPIRV/SPIRVPrepareFunctions.cpp    |   5 +
 llvm/lib/Target/SPIRV/SPIRVUtils.h            |  54 ++++++
 .../SPIRV/SpecConstants/restore-spec-type.ll  |  46 +++++
 .../CodeGen/SPIRV/instructions/atomic-ptr.ll  |  28 +++
 .../transcoding/OpGroupAsyncCopy-strided.ll   |   2 +-
 .../SPIRV/transcoding/spirv-event-null.ll     |   2 +-
 11 files changed, 276 insertions(+), 55 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/SpecConstants/restore-spec-type.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 566eafd41e9bd..03f5c197958af 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -71,17 +71,23 @@ class SPIRVEmitIntrinsics
 
   // deduce element type of untyped pointers
   Type *deduceElementType(Value *I, bool UnknownElemTypeI8);
-  Type *deduceElementTypeHelper(Value *I);
-  Type *deduceElementTypeHelper(Value *I, std::unordered_set<Value *> &Visited);
+  Type *deduceElementTypeHelper(Value *I, bool UnknownElemTypeI8);
+  Type *deduceElementTypeHelper(Value *I, std::unordered_set<Value *> &Visited,
+                                bool UnknownElemTypeI8);
+  Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
+                                     bool UnknownElemTypeI8);
   Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
-                                     std::unordered_set<Value *> &Visited);
+                                     std::unordered_set<Value *> &Visited,
+                                     bool UnknownElemTypeI8);
   Type *deduceElementTypeByUsersDeep(Value *Op,
-                                     std::unordered_set<Value *> &Visited);
+                                     std::unordered_set<Value *> &Visited,
+                                     bool UnknownElemTypeI8);
 
   // deduce nested types of composites
-  Type *deduceNestedTypeHelper(User *U);
+  Type *deduceNestedTypeHelper(User *U, bool UnknownElemTypeI8);
   Type *deduceNestedTypeHelper(User *U, Type *Ty,
-                               std::unordered_set<Value *> &Visited);
+                               std::unordered_set<Value *> &Visited,
+                               bool UnknownElemTypeI8);
 
   // deduce Types of operands of the Instruction if possible
   void deduceOperandElementType(Instruction *I);
@@ -223,6 +229,18 @@ static inline void reportFatalOnTokenType(const Instruction *I) {
                        false);
 }
 
+// maybe restore original function return type
+static inline Type *restoreMutatedType(SPIRVGlobalRegistry *GR, Instruction *I,
+                                       Type *Ty) {
+  CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI || CI->isIndirectCall() || CI->isInlineAsm() ||
+      !CI->getCalledFunction() || CI->getCalledFunction()->isIntrinsic())
+    return Ty;
+  if (Type *OriginalTy = GR->findMutated(CI->getCalledFunction()))
+    return OriginalTy;
+  return Ty;
+}
+
 void SPIRVEmitIntrinsics::buildAssignType(IRBuilder<> &B, Type *Ty,
                                           Value *Arg) {
   Value *OfType = PoisonValue::get(Ty);
@@ -263,15 +281,26 @@ void SPIRVEmitIntrinsics::updateAssignType(CallInst *AssignCI, Value *Arg,
 
 // Set element pointer type to the given value of ValueTy and tries to
 // specify this type further (recursively) by Operand value, if needed.
+Type *
+SPIRVEmitIntrinsics::deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
+                                                  bool UnknownElemTypeI8) {
+  std::unordered_set<Value *> Visited;
+  return deduceElementTypeByValueDeep(ValueTy, Operand, Visited,
+                                      UnknownElemTypeI8);
+}
+
 Type *SPIRVEmitIntrinsics::deduceElementTypeByValueDeep(
-    Type *ValueTy, Value *Operand, std::unordered_set<Value *> &Visited) {
+    Type *ValueTy, Value *Operand, std::unordered_set<Value *> &Visited,
+    bool UnknownElemTypeI8) {
   Type *Ty = ValueTy;
   if (Operand) {
     if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
-      if (Type *NestedTy = deduceElementTypeHelper(Operand, Visited))
-        Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
+      if (Type *NestedTy =
+              deduceElementTypeHelper(Operand, Visited, UnknownElemTypeI8))
+        Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
     } else {
-      Ty = deduceNestedTypeHelper(dyn_cast<User>(Operand), Ty, Visited);
+      Ty = deduceNestedTypeHelper(dyn_cast<User>(Operand), Ty, Visited,
+                                  UnknownElemTypeI8);
     }
   }
   return Ty;
@@ -279,12 +308,12 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeByValueDeep(
 
 // Traverse User instructions to deduce an element pointer type of the operand.
 Type *SPIRVEmitIntrinsics::deduceElementTypeByUsersDeep(
-    Value *Op, std::unordered_set<Value *> &Visited) {
+    Value *Op, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8) {
   if (!Op || !isPointerTy(Op->getType()))
     return nullptr;
 
-  if (auto PType = dyn_cast<TypedPointerType>(Op->getType()))
-    return PType->getElementType();
+  if (auto ElemTy = getPointeeType(Op->getType()))
+    return ElemTy;
 
   // maybe we already know operand's element type
   if (Type *KnownTy = GR->findDeducedElementType(Op))
@@ -292,7 +321,7 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeByUsersDeep(
 
   for (User *OpU : Op->users()) {
     if (Instruction *Inst = dyn_cast<Instruction>(OpU)) {
-      if (Type *Ty = deduceElementTypeHelper(Inst, Visited))
+      if (Type *Ty = deduceElementTypeHelper(Inst, Visited, UnknownElemTypeI8))
         return Ty;
     }
   }
@@ -314,13 +343,14 @@ static Type *getPointeeTypeByCallInst(StringRef DemangledName,
 
 // Deduce and return a successfully deduced Type of the Instruction,
 // or nullptr otherwise.
-Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(Value *I) {
+Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(Value *I,
+                                                   bool UnknownElemTypeI8) {
   std::unordered_set<Value *> Visited;
-  return deduceElementTypeHelper(I, Visited);
+  return deduceElementTypeHelper(I, Visited, UnknownElemTypeI8);
 }
 
 Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
-    Value *I, std::unordered_set<Value *> &Visited) {
+    Value *I, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8) {
   // allow to pass nullptr as an argument
   if (!I)
     return nullptr;
@@ -338,34 +368,46 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
   Type *Ty = nullptr;
   // look for known basic patterns of type inference
   if (auto *Ref = dyn_cast<AllocaInst>(I)) {
-    Ty = Ref->getAllocatedType();
+    Type *RefTy = Ref->getAllocatedType();
+    if (UnknownElemTypeI8 || !isUntypedPointerTy(RefTy))
+      Ty = RefTy;
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
     Ty = Ref->getResultElementType();
   } else if (auto *Ref = dyn_cast<GlobalValue>(I)) {
     Ty = deduceElementTypeByValueDeep(
         Ref->getValueType(),
-        Ref->getNumOperands() > 0 ? Ref->getOperand(0) : nullptr, Visited);
+        Ref->getNumOperands() > 0 ? Ref->getOperand(0) : nullptr, Visited,
+        UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<AddrSpaceCastInst>(I)) {
-    Ty = deduceElementTypeHelper(Ref->getPointerOperand(), Visited);
+    Type *RefTy = deduceElementTypeHelper(Ref->getPointerOperand(), Visited,
+                                          UnknownElemTypeI8);
+    if (UnknownElemTypeI8 || !isUntypedPointerTy(RefTy))
+      Ty = RefTy;
   } else if (auto *Ref = dyn_cast<BitCastInst>(I)) {
     if (Type *Src = Ref->getSrcTy(), *Dest = Ref->getDestTy();
         isPointerTy(Src) && isPointerTy(Dest))
-      Ty = deduceElementTypeHelper(Ref->getOperand(0), Visited);
+      Ty = deduceElementTypeHelper(Ref->getOperand(0), Visited,
+                                   UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<AtomicCmpXchgInst>(I)) {
-    Value *Op = Ref->getNewValOperand();
-    Ty = deduceElementTypeByValueDeep(Op->getType(), Op, Visited);
+    Type *RefTy = deduceElementTypeHelper(Ref->getNewValOperand(), Visited,
+                                          UnknownElemTypeI8);
+    if (UnknownElemTypeI8 || !isUntypedPointerTy(RefTy))
+      Ty = RefTy;
   } else if (auto *Ref = dyn_cast<AtomicRMWInst>(I)) {
-    Value *Op = Ref->getValOperand();
-    Ty = deduceElementTypeByValueDeep(Op->getType(), Op, Visited);
+    Type *RefTy = deduceElementTypeHelper(Ref->getValOperand(), Visited,
+                                          UnknownElemTypeI8);
+    if (UnknownElemTypeI8 || !isUntypedPointerTy(RefTy))
+      Ty = RefTy;
   } else if (auto *Ref = dyn_cast<PHINode>(I)) {
     for (unsigned i = 0; i < Ref->getNumIncomingValues(); i++) {
-      Ty = deduceElementTypeByUsersDeep(Ref->getIncomingValue(i), Visited);
+      Ty = deduceElementTypeByUsersDeep(Ref->getIncomingValue(i), Visited,
+                                        UnknownElemTypeI8);
       if (Ty)
         break;
     }
   } else if (auto *Ref = dyn_cast<SelectInst>(I)) {
     for (Value *Op : {Ref->getTrueValue(), Ref->getFalseValue()}) {
-      Ty = deduceElementTypeByUsersDeep(Op, Visited);
+      Ty = deduceElementTypeByUsersDeep(Op, Visited, UnknownElemTypeI8);
       if (Ty)
         break;
     }
@@ -387,7 +429,7 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
       auto AsArgIt = ResTypeByArg.find(DemangledName);
       if (AsArgIt != ResTypeByArg.end()) {
         Ty = deduceElementTypeHelper(CI->getArgOperand(AsArgIt->second),
-                                     Visited);
+                                     Visited, UnknownElemTypeI8);
       }
     }
   }
@@ -404,13 +446,15 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
 // Re-create a type of the value if it has untyped pointer fields, also nested.
 // Return the original value type if no corrections of untyped pointer
 // information is found or needed.
-Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(User *U) {
+Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(User *U,
+                                                  bool UnknownElemTypeI8) {
   std::unordered_set<Value *> Visited;
-  return deduceNestedTypeHelper(U, U->getType(), Visited);
+  return deduceNestedTypeHelper(U, U->getType(), Visited, UnknownElemTypeI8);
 }
 
 Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
-    User *U, Type *OrigTy, std::unordered_set<Value *> &Visited) {
+    User *U, Type *OrigTy, std::unordered_set<Value *> &Visited,
+    bool UnknownElemTypeI8) {
   if (!U)
     return OrigTy;
 
@@ -432,10 +476,12 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
       Type *Ty = OpTy;
       if (Op) {
         if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
-          if (Type *NestedTy = deduceElementTypeHelper(Op, Visited))
+          if (Type *NestedTy =
+                  deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
             Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
         } else {
-          Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited);
+          Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
+                                      UnknownElemTypeI8);
         }
       }
       Tys.push_back(Ty);
@@ -451,10 +497,12 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
       Type *OpTy = ArrTy->getElementType();
       Type *Ty = OpTy;
       if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
-        if (Type *NestedTy = deduceElementTypeHelper(Op, Visited))
+        if (Type *NestedTy =
+                deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
           Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
       } else {
-        Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited);
+        Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
+                                    UnknownElemTypeI8);
       }
       if (Ty != OpTy) {
         Type *NewTy = ArrayType::get(Ty, ArrTy->getNumElements());
@@ -467,10 +515,12 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
       Type *OpTy = VecTy->getElementType();
       Type *Ty = OpTy;
       if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
-        if (Type *NestedTy = deduceElementTypeHelper(Op, Visited))
-          Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
+        if (Type *NestedTy =
+                deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
+          Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
       } else {
-        Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited);
+        Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
+                                    UnknownElemTypeI8);
       }
       if (Ty != OpTy) {
         Type *NewTy = VectorType::get(Ty, VecTy->getElementCount());
@@ -484,7 +534,7 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
 }
 
 Type *SPIRVEmitIntrinsics::deduceElementType(Value *I, bool UnknownElemTypeI8) {
-  if (Type *Ty = deduceElementTypeHelper(I))
+  if (Type *Ty = deduceElementTypeHelper(I, UnknownElemTypeI8))
     return Ty;
   return UnknownElemTypeI8 ? IntegerType::getInt8Ty(I->getContext()) : nullptr;
 }
@@ -506,6 +556,19 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
       if (isPointerTy(Op->getType()))
         Ops.push_back(std::make_pair(Op, i));
     }
+  } else if (auto *Ref = dyn_cast<AddrSpaceCastInst>(I)) {
+    KnownElemTy = GR->findDeducedElementType(I);
+    if (!KnownElemTy)
+      return;
+    Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
+  } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
+    KnownElemTy = Ref->getValueOperand()->getType();
+    if (isUntypedPointerTy(KnownElemTy))
+      return;
+    if (GR->findDeducedElementType(Ref->getPointerOperand()))
+      return;
+    Ops.push_back(std::make_pair(Ref->getPointerOperand(),
+                                 StoreInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<SelectInst>(I)) {
     if (!isPointerTy(I->getType()) ||
         !(KnownElemTy = GR->findDeducedElementType(I)))
@@ -719,7 +782,7 @@ void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) {
         I->replaceUsesOfWith(Op, CI);
         KeepInst = true;
         AggrConsts[CI] = AggrConst;
-        AggrConstTypes[CI] = deduceNestedTypeHelper(AggrConst);
+        AggrConstTypes[CI] = deduceNestedTypeHelper(AggrConst, false);
       }
     }
     if (!KeepInst)
@@ -864,8 +927,9 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
     Pointer = BC->getOperand(0);
 
   // Do not emit spv_ptrcast if Pointer's element type is ExpectedElementType
-  Type *PointerElemTy = deduceElementTypeHelper(Pointer);
-  if (PointerElemTy == ExpectedElementType)
+  Type *PointerElemTy = deduceElementTypeHelper(Pointer, false);
+  if (PointerElemTy == ExpectedElementType ||
+      isEquivalentTypes(PointerElemTy, ExpectedElementType))
     return;
 
   setInsertPointSkippingPhis(B, I);
@@ -937,8 +1001,14 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
         I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0,
         B);
   } else if (SI) {
-    return replacePointerOperandWithPtrCast(
-        I, SI->getPointerOperand(), SI->getValueOperand()->getType(), 1, B);
+    Value *Op = SI->getValueOperand();
+    Type *OpTy = Op->getType();
+    if (auto *OpI = dyn_cast<Instruction>(Op))
+      OpTy = restoreMutatedType(GR, OpI, OpTy);
+    if (OpTy == Op->getType())
+      OpTy = deduceElementTypeByValueDeep(OpTy, Op, false);
+    return replacePointerOperandWithPtrCast(I, SI->getPointerOperand(), OpTy, 1,
+                                            B);
   } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     return replacePointerOperandWithPtrCast(I, LI->getPointerOperand(),
                                             LI->getType(), 0, B);
@@ -978,7 +1048,7 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
         } else {
           for (User *U : CalledArg->users()) {
             if (Instruction *Inst = dyn_cast<Instruction>(U)) {
-              if ((ElemTy = deduceElementTypeHelper(Inst)) != nullptr)
+              if ((ElemTy = deduceElementTypeHelper(Inst, false)) != nullptr)
                 break;
             }
           }
@@ -1182,7 +1252,7 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV,
     // Deduce element type and store results in Global Registry.
     // Result is ignored, because TypedPointerType is not supported
     // by llvm IR general logic.
-    deduceElementTypeHelper(&GV);
+    deduceElementTypeHelper(&GV, false);
     Constant *Init = GV.getInitializer();
     Type *Ty = isAggrConstForceInt32(Init) ? B.getInt32Ty() : Init->getType();
     Constant *Const = isAggrConstForceInt32(Init) ? B.getInt32(1) : Init;
@@ -1230,6 +1300,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
         TypeToAssign = It->second;
       }
     }
+    TypeToAssign = restoreMutatedType(GR, I, TypeToAssign);
     buildAssignType(B, TypeToAssign, I);
   }
   for (const auto &Op : I->operands()) {
@@ -1343,7 +1414,7 @@ Type *SPIRVEmitIntrinsics::deduceFunParamElementType(
       return KnownTy;
     // try to deduce from the operand itself
     Visited.clear();
-    if (Type *Ty = deduceElementTypeHelper(OpArg, Visited))
+    if (Type *Ty = deduceElementTypeHelper(OpArg, Visited, false))
       return Ty;
     // search in actual parameter's users
     for (User *OpU : OpArg->users()) {
@@ -1351,7 +1422,7 @@ Type *SPIRVEmitIntrinsics::deduceFunParamElementType(
       if (!Inst || Inst == CI)
         continue;
       Visited.clear();
-      if (Type *Ty = deduceElementTypeHelper(Inst, Visited))
+      if (Type *Ty = deduceElementTypeHelper(Inst, Visited, false))
         return Ty;
     }
     // check if it's a formal parameter of the outer function
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index a45e1ccd0717f..e5e4733b98d91 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -51,6 +51,10 @@ class SPIRVGlobalRegistry {
   // Maps Functions to their calls (in a form of the machine instruction,
   // OpFunctionCall) that happened before the definition is available
   DenseMap<const Function *, SmallPtrSet<MachineInstr *, 8>> ForwardCalls;
+  // map a Function to its original return type before the clone function was
+  // created during substitution of aggregate arguments
+  // (see `SPIRVPrepareFunctions::removeAggregateTypesFromSignature()`)
+  DenseMap<Value *, Type *> MutatedAggRet;
 
   // Look for an equivalent of the newType in the map. Return the equivalent
   // if it's found, otherwise insert newType to the map and return the type.
@@ -163,6 +167,18 @@ class SPIRVGlobalRegistry {
     return It == AssignPtrTypeInstr.end() ? nullptr : It->second;
   }
 
+  // A registry of mutated values
+  // (see `SPIRVPrepareFunctions::removeAggregateTypesFromSignature()`):
+  // - Add a record.
+  void addMutated(Value *Val, Type *Ty) {
+    MutatedAggRet[Val] = Ty;
+  }
+  // - Find a record.
+  Type *findMutated(const Value *Val) {
+    auto It = MutatedAggRet.find(Val);
+    return It == MutatedAggRet.end() ? nullptr : It->second;
+  }
+
   // Deduced element types of untyped pointers and composites:
   // - Add a record to the map of deduced element types.
   void addDeducedElementType(Value *Val, Type *Ty) { DeducedElTys[Val] = Ty; }
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9be736ce88ce4..04def5ef01e7b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2190,7 +2190,7 @@ bool SPIRVInstructionSelector::selectGlobalValue(
   // FIXME: don't use MachineIRBuilder here, replace it with BuildMI.
   MachineIRBuilder MIRBuilder(I);
   const GlobalValue *GV = I.getOperand(1).getGlobal();
-  Type *GVType = GR.getDeducedGlobalValueType(GV);
+  Type *GVType = toTypedPointer(GR.getDeducedGlobalValueType(GV));
   SPIRVType *PointerBaseType;
   if (GVType->isArrayTy()) {
     SPIRVType *ArrayElementType =
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index 6c7c3af199652..e775f8c57b048 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -138,7 +138,8 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
       s16,   s32,   s64,   v2s16, v2s32, v2s64, v3s16,  v3s32,  v3s64,
       v4s16, v4s32, v4s64, v8s16, v8s32, v8s64, v16s16, v16s32, v16s64};
 
-  auto allFloatAndIntScalars = allIntScalars;
+  auto allFloatAndIntScalarsAndPtrs = {s8, s16, s32, s64, p0, p1,
+                                       p2, p3,  p4,  p5,  p6};
 
   auto allPtrs = {p0, p1, p2, p3, p4, p5, p6};
   auto allWritablePtrs = {p0, p1, p3, p4, p5, p6};
@@ -238,7 +239,7 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
       .legalForCartesianProduct(allFloatScalars, allWritablePtrs);
 
   getActionDefinitionsBuilder(G_ATOMICRMW_XCHG)
-      .legalForCartesianProduct(allFloatAndIntScalars, allWritablePtrs);
+      .legalForCartesianProduct(allFloatAndIntScalarsAndPtrs, allWritablePtrs);
 
   getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS).lower();
   // TODO: add proper legalization rules.
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index 0ea2f176565e6..fb379f0f644a3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -224,8 +224,8 @@ static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR,
       case TargetOpcode::G_GLOBAL_VALUE: {
         MIB.setInsertPt(*MI->getParent(), MI);
         const GlobalValue *Global = MI->getOperand(1).getGlobal();
-        Type *ElementTy = GR->getDeducedGlobalValueType(Global);
-        auto *Ty = TypedPointerType::get(toTypedPointer(ElementTy),
+        Type *ElementTy = toTypedPointer(GR->getDeducedGlobalValueType(Global));
+        auto *Ty = TypedPointerType::get(ElementTy,
                                          Global->getType()->getAddressSpace());
         SpirvTy = GR->getOrCreateSPIRVType(Ty, MIB);
         break;
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 7bee87d7204ed..29b8f8fac98e8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -536,6 +536,11 @@ SPIRVPrepareFunctions::removeAggregateTypesFromSignature(Function *F) {
       CI->mutateFunctionType(NewF->getFunctionType());
     U->replaceUsesOfWith(F, NewF);
   }
+
+  // register the mutation
+  if (RetType != F->getReturnType())
+    TM.getSubtarget<SPIRVSubtarget>(*F).getSPIRVGlobalRegistry()->addMutated(
+        NewF, F->getReturnType());
   return NewF;
 }
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 12725d6bac14a..1daea3c2c1ec7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -153,7 +153,61 @@ inline Type *reconstructFunctionType(Function *F) {
   return FunctionType::get(F->getReturnType(), ArgTys, F->isVarArg());
 }
 
+#define TYPED_PTR_TARGET_EXT_NAME "spirv.$TypedPointerType"
+inline Type *getTypedPointerWrapper(Type *ElemTy, unsigned AS) {
+  return TargetExtType::get(ElemTy->getContext(), TYPED_PTR_TARGET_EXT_NAME,
+                            {ElemTy}, {AS});
+}
+
+inline bool isTypedPointerWrapper(TargetExtType *ExtTy) {
+  return ExtTy->getName() == TYPED_PTR_TARGET_EXT_NAME &&
+         ExtTy->getNumIntParameters() == 1 &&
+         ExtTy->getNumTypeParameters() == 1;
+}
+
+inline Type *applyWrappers(Type *Ty) {
+  if (auto *ExtTy = dyn_cast<TargetExtType>(Ty)) {
+    if (isTypedPointerWrapper(ExtTy))
+      return TypedPointerType::get(applyWrappers(ExtTy->getTypeParameter(0)),
+                                   ExtTy->getIntParameter(0));
+  } else if (auto *VecTy = dyn_cast<VectorType>(Ty)) {
+    Type *ElemTy = VecTy->getElementType();
+    Type *NewElemTy = ElemTy->isTargetExtTy() ? applyWrappers(ElemTy) : ElemTy;
+    if (NewElemTy != ElemTy)
+      return VectorType::get(NewElemTy, VecTy->getElementCount());
+  }
+  return Ty;
+}
+
+inline Type *getPointeeType(Type *Ty) {
+  if (auto PType = dyn_cast<TypedPointerType>(Ty))
+    return PType->getElementType();
+  else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
+    if (isTypedPointerWrapper(ExtTy))
+      return applyWrappers(ExtTy->getTypeParameter(0));
+  return nullptr;
+}
+
+inline bool isUntypedEquivalentToTyExt(Type *Ty1, Type *Ty2) {
+  if (!isUntypedPointerTy(Ty1) || !Ty2)
+    return false;
+  if (auto *ExtTy = dyn_cast<TargetExtType>(Ty2))
+    if (isTypedPointerWrapper(ExtTy) &&
+        ExtTy->getTypeParameter(0) ==
+            IntegerType::getInt8Ty(Ty1->getContext()) &&
+        ExtTy->getIntParameter(0) == cast<PointerType>(Ty1)->getAddressSpace())
+      return true;
+  return false;
+}
+
+inline bool isEquivalentTypes(Type *Ty1, Type *Ty2) {
+  return isUntypedEquivalentToTyExt(Ty1, Ty2) ||
+         isUntypedEquivalentToTyExt(Ty2, Ty1);
+}
+
 inline Type *toTypedPointer(Type *Ty) {
+  if (Type *NewTy = applyWrappers(Ty); NewTy != Ty)
+    return NewTy;
   return isUntypedPointerTy(Ty)
              ? TypedPointerType::get(IntegerType::getInt8Ty(Ty->getContext()),
                                      getPointerAddressSpace(Ty))
diff --git a/llvm/test/CodeGen/SPIRV/SpecConstants/restore-spec-type.ll b/llvm/test/CodeGen/SPIRV/SpecConstants/restore-spec-type.ll
new file mode 100644
index 0000000000000..9e91854de1172
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/SpecConstants/restore-spec-type.ll
@@ -0,0 +1,46 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#FloatTy:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#StructTy:]] = OpTypeStruct %[[#FloatTy]]
+; CHECK-DAG: %[[#ArrayTy:]] = OpTypeArray %[[#StructTy]] %[[#]]
+; CHECK-DAG: %[[#Struct7Ty:]] = OpTypeStruct %[[#ArrayTy]]
+; CHECK-DAG: %[[#Void:]] = OpTypeVoid
+; CHECK-DAG: %[[#PtrStructTy:]] = OpTypePointer Generic %[[#StructTy]]
+; CHECK-DAG: %[[#PtrStruct7Ty:]] = OpTypePointer Generic %[[#Struct7Ty]]
+; CHECK-DAG: %[[#FunTy:]] = OpTypeFunction %[[#Void]] %[[#PtrStructTy]] %[[#PtrStruct7Ty]]
+; CHECK-DAG: %[[#Const1:]] = OpConstant %[[#FloatTy]] 1
+; CHECK-DAG: %[[#FPtrStructTy:]] = OpTypePointer Function %[[#StructTy]]
+; CHECK-DAG: %[[#Spec1:]] = OpSpecConstantComposite %[[#StructTy]] %[[#Const1]]
+; CHECK-DAG: %[[#Spec2:]] = OpSpecConstantComposite %[[#ArrayTy]] %[[#Spec1]] %[[#Spec1]]
+; CHECK-DAG: %[[#Spec3:]] = OpSpecConstantComposite %[[#Struct7Ty]] %[[#Spec2]]
+; CHECK: %[[#FunDef:]] = OpFunction %[[#Void]] None %[[#FunTy]]
+; CHECK: %[[#Arg1:]] = OpFunctionParameter %[[#PtrStructTy]]
+; CHECK: %[[#Arg2:]] = OpFunctionParameter %[[#PtrStruct7Ty]]
+; CHECK: %[[#]] = OpVariable %[[#FPtrStructTy]] Function
+; CHECK: OpStore %[[#Arg1]] %[[#Spec1]]
+; CHECK: OpStore %[[#Arg2]] %[[#Spec3]]
+; CHECK: OpFunctionEnd
+
+%Struct = type <{ float }>
+%Struct7 = type [2 x %Struct]
+%Nested = type { %Struct7 }
+
+define spir_kernel void @foo(ptr addrspace(4) %arg1, ptr addrspace(4) %arg2) {
+entry:
+  %var = alloca %Struct
+  %r1 = call %Struct @_Z29__spirv_SpecConstantComposite_1(float 1.0)
+  store %Struct %r1, ptr addrspace(4) %arg1
+  %r2 = call %Struct7 @_Z29__spirv_SpecConstantComposite_2(%Struct %r1, %Struct %r1)
+  %r3 = call %Nested @_Z29__spirv_SpecConstantComposite_3(%Struct7 %r2)
+  store %Nested %r3, ptr addrspace(4) %arg2
+
+  ret void
+}
+
+declare %Struct @_Z29__spirv_SpecConstantComposite_1(float)
+declare %Struct7 @_Z29__spirv_SpecConstantComposite_2(%Struct, %Struct)
+declare %Nested @_Z29__spirv_SpecConstantComposite_3(%Struct7)
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll
new file mode 100644
index 0000000000000..d9a1e632fe008
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll
@@ -0,0 +1,28 @@
+; The goal of the test case is to ensure that the Backend doesn't crash on the stage
+; of type inference. Result SPIR-V is not expected to be valid from the perspective
+; of spirv-val in this case, because there is a difference of accepted return types
+; between atomicrmw and OpAtomicExchange.
+
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
+
+; CHECK-DAG: %[[#CharTy:]] = OpTypeInt 8 0
+; CHECK-DAG: %[[#LongTy:]] = OpTypeInt 64 0
+; CHECK-DAG: %[[#PtrCharTy:]] = OpTypePointer CrossWorkgroup %[[#CharTy]]
+; CHECK-DAG: %[[#PtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#LongTy]]
+; CHECK-DAG: %[[#IntTy:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#Scope:]] = OpConstant %[[#IntTy]] 1
+; CHECK-DAG: %[[#MemSem:]] = OpConstant %[[#IntTy]] 8
+; CHECK-DAG: %[[#PtrPtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#PtrLongTy]]
+; CHECK: OpFunction
+; CHECK: %[[#Arg1:]] = OpFunctionParameter %[[#PtrCharTy]]
+; CHECK: %[[#Arg2:]] = OpFunctionParameter %[[#PtrLongTy]]
+; CHECK: %[[#CastedArg1:]] = OpBitcast %[[#PtrPtrLongTy]] %[[#Arg1]]
+; CHECK: OpAtomicExchange %[[#PtrLongTy]] %[[#CastedArg1]] %[[#Scope]] %[[#MemSem]] %[[#Arg2]]
+; CHECK: OpFunctionEnd
+
+define dso_local spir_func void @test_atomicrmw(ptr addrspace(1) %arg1, ptr addrspace(1) byval(i64) %arg_ptr) {
+entry:
+  %r = atomicrmw xchg ptr addrspace(1) %arg1, ptr addrspace(1) %arg_ptr acq_rel
+  ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
index 96d6016083f06..c33edf2064eb4 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
@@ -15,10 +15,10 @@
 ; CHECK-SPIRV-DAG: %[[#FunPtrEventTy:]] = OpTypePointer Function %[[#EventTy]]
 ; CHECK-SPIRV: OpFunction
 ; CHECK-SPIRV: %[[#Var:]] = OpVariable %[[#]] Function
+; CHECK-SPIRV: %[[#VarPtrEvent2:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
 ; CHECK-SPIRV: %[[#ResEvent:]] = OpGroupAsyncCopy %[[#EventTy]] %[[#Scope]] %[[#Null]] %[[#Null]] %[[#Num]] %[[#Stride]] %[[#Null]]
 ; CHECK-SPIRV: %[[#VarPtrEvent:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
 ; CHECK-SPIRV: OpStore %[[#VarPtrEvent]] %[[#ResEvent]]
-; CHECK-SPIRV: %[[#VarPtrEvent2:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
 ; CHECK-SPIRV: %[[#PtrEventGen:]] = OpPtrCastToGeneric %[[#]] %[[#VarPtrEvent2]]
 ; CHECK-SPIRV: OpGroupWaitEvents %[[#Scope]] %[[#Num]] %[[#PtrEventGen]]
 ; CHECK-SPIRV: OpFunctionEnd
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
index df11565ca8180..fcb61911e0d29 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
@@ -53,13 +53,13 @@ declare dso_local spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU
 ; CHECK: %[[#BarArg1:]] = OpFunctionParameter %[[#TyPtrSV4_W]]
 ; CHECK: %[[#BarArg2:]] = OpFunctionParameter %[[#TyPtrSV4_CW]]
 ; CHECK: %[[#EventVarBar:]] = OpVariable %[[#TyStructPtr]] Function
+; CHECK: %[[#EventVarBarCasted2:]] = OpBitcast %[[#TyEventPtr]] %[[#EventVarBar]]
 ; CHECK: %[[#SrcBar:]] = OpInBoundsPtrAccessChain %[[#TyPtrSV4_CW]] %[[#BarArg2]] %[[#]]
 ; CHECK-DAG: %[[#BarArg1Casted:]] = OpBitcast %[[#TyPtrV4_W]] %[[#BarArg1]]
 ; CHECK-DAG: %[[#SrcBarCasted:]] = OpBitcast %[[#TyPtrV4_CW]] %[[#SrcBar]]
 ; CHECK: %[[#ResBar:]] = OpGroupAsyncCopy %[[#TyEvent]] %[[#]] %[[#BarArg1Casted]] %[[#SrcBarCasted]] %[[#]] %[[#]] %[[#ConstEvent]]
 ; CHECK: %[[#EventVarBarCasted:]] = OpBitcast %[[#TyEventPtr]] %[[#EventVarBar]]
 ; CHECK: OpStore %[[#EventVarBarCasted]] %[[#ResBar]]
-; CHECK: %[[#EventVarBarCasted2:]] = OpBitcast %[[#TyEventPtr]] %[[#EventVarBar]]
 ; CHECK: %[[#EventVarBarGen:]] = OpPtrCastToGeneric %[[#TyEventPtrGen]] %[[#EventVarBarCasted2]]
 ; CHECK: OpGroupWaitEvents %[[#]] %[[#]] %[[#EventVarBarGen]]
 ; CHECK: OpFunctionEnd

>From be1dd53a12f153c5365bb17ef815b1481862c617 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Mon, 8 Jul 2024 09:42:27 -0700
Subject: [PATCH 2/5] clang-format

---
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index e5e4733b98d91..0e26b38225f7a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -170,9 +170,7 @@ class SPIRVGlobalRegistry {
   // A registry of mutated values
   // (see `SPIRVPrepareFunctions::removeAggregateTypesFromSignature()`):
   // - Add a record.
-  void addMutated(Value *Val, Type *Ty) {
-    MutatedAggRet[Val] = Ty;
-  }
+  void addMutated(Value *Val, Type *Ty) { MutatedAggRet[Val] = Ty; }
   // - Find a record.
   Type *findMutated(const Value *Val) {
     auto It = MutatedAggRet.find(Val);

>From b6537c877516fe54d2c74915335b1934fc270441 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Mon, 8 Jul 2024 15:08:49 -0700
Subject: [PATCH 3/5] type inference for atomic instructions

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 58 ++++++++++++++---
 .../CodeGen/SPIRV/instructions/atomic-ptr.ll  | 22 +++++--
 .../test/CodeGen/SPIRV/instructions/atomic.ll | 64 ++++++++-----------
 .../SPIRV/instructions/atomic_acqrel.ll       | 64 ++++++++-----------
 .../CodeGen/SPIRV/instructions/atomic_seq.ll  | 64 ++++++++-----------
 5 files changed, 149 insertions(+), 123 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 03f5c197958af..ea42459929bd4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -389,15 +389,13 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
       Ty = deduceElementTypeHelper(Ref->getOperand(0), Visited,
                                    UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<AtomicCmpXchgInst>(I)) {
-    Type *RefTy = deduceElementTypeHelper(Ref->getNewValOperand(), Visited,
-                                          UnknownElemTypeI8);
-    if (UnknownElemTypeI8 || !isUntypedPointerTy(RefTy))
-      Ty = RefTy;
+    Value *Op = Ref->getNewValOperand();
+    if (isPointerTy(Op->getType()))
+      Ty = deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<AtomicRMWInst>(I)) {
-    Type *RefTy = deduceElementTypeHelper(Ref->getValOperand(), Visited,
-                                          UnknownElemTypeI8);
-    if (UnknownElemTypeI8 || !isUntypedPointerTy(RefTy))
-      Ty = RefTy;
+    Value *Op = Ref->getValOperand();
+    if (isPointerTy(Op->getType()))
+      Ty = deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<PHINode>(I)) {
     for (unsigned i = 0; i < Ref->getNumIncomingValues(); i++) {
       Ty = deduceElementTypeByUsersDeep(Ref->getIncomingValue(i), Visited,
@@ -539,6 +537,19 @@ Type *SPIRVEmitIntrinsics::deduceElementType(Value *I, bool UnknownElemTypeI8) {
   return UnknownElemTypeI8 ? IntegerType::getInt8Ty(I->getContext()) : nullptr;
 }
 
+static inline Type *getAtomicElemTy(SPIRVGlobalRegistry *GR, Instruction *I,
+                                    Value *PointerOperand) {
+  Type *PointeeTy = GR->findDeducedElementType(PointerOperand);
+  if (PointeeTy && !isUntypedPointerTy(PointeeTy))
+    return nullptr;
+  auto *PtrTy = dyn_cast<PointerType>(I->getType());
+  if (!PtrTy)
+    return I->getType();
+  if (Type *NestedTy = GR->findDeducedElementType(I))
+    return getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
+  return nullptr;
+}
+
 // If the Instruction has Pointer operands with unresolved types, this function
 // tries to deduce them. If the Instruction has Pointer operands with known
 // types which differ from expected, this function tries to insert a bitcast to
@@ -561,14 +572,36 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
     if (!KnownElemTy)
       return;
     Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
+  } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
+    KnownElemTy = I->getType();
+    if (isUntypedPointerTy(KnownElemTy))
+      return;
+    Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
+    if (PointeeTy && !isUntypedPointerTy(PointeeTy))
+      return;
+    Ops.push_back(std::make_pair(Ref->getPointerOperand(),
+                                 LoadInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
     KnownElemTy = Ref->getValueOperand()->getType();
     if (isUntypedPointerTy(KnownElemTy))
       return;
-    if (GR->findDeducedElementType(Ref->getPointerOperand()))
+    Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
+    if (PointeeTy && !isUntypedPointerTy(PointeeTy))
       return;
     Ops.push_back(std::make_pair(Ref->getPointerOperand(),
                                  StoreInst::getPointerOperandIndex()));
+  } else if (auto *Ref = dyn_cast<AtomicCmpXchgInst>(I)) {
+    KnownElemTy = getAtomicElemTy(GR, I, Ref->getPointerOperand());
+    if (!KnownElemTy)
+      return;
+    Ops.push_back(std::make_pair(Ref->getPointerOperand(),
+                                 AtomicCmpXchgInst::getPointerOperandIndex()));
+  } else if (auto *Ref = dyn_cast<AtomicRMWInst>(I)) {
+    KnownElemTy = getAtomicElemTy(GR, I, Ref->getPointerOperand());
+    if (!KnownElemTy)
+      return;
+    Ops.push_back(std::make_pair(Ref->getPointerOperand(),
+                                 AtomicRMWInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<SelectInst>(I)) {
     if (!isPointerTy(I->getType()) ||
         !(KnownElemTy = GR->findDeducedElementType(I)))
@@ -1512,6 +1545,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   for (auto &I : instructions(Func))
     Worklist.push_back(&I);
 
+  // SmallVector<Instruction *> Postponed;
   for (auto &I : Worklist) {
     // Don't emit intrinsincs for convergence intrinsics.
     if (isConvergenceIntrinsic(I))
@@ -1526,11 +1560,17 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
     // already, and force it to be i8 if not
     if (Postpone && !GR->findAssignPtrTypeInstr(I))
       insertAssignPtrTypeIntrs(I, B, true);
+    // Postponed.push_back(I);
   }
 
   for (auto &I : instructions(Func))
     deduceOperandElementType(&I);
 
+  //  for (auto &I : Postponed)
+  //      insertAssignPtrTypeIntrs(I, B, true);
+  //  for (auto IB = Postponed.rbegin(), IE = Postponed.rend(); IB != IE; ++IB)
+  //    insertAssignPtrTypeIntrs(*IB, B, true);
+
   for (auto *I : Worklist) {
     TrackConstants = true;
     if (!I->getType()->isVoidTy() || isa<StoreInst>(I))
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll
index d9a1e632fe008..86e9be15a7c08 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll
@@ -6,23 +6,33 @@
 ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
 
-; CHECK-DAG: %[[#CharTy:]] = OpTypeInt 8 0
 ; CHECK-DAG: %[[#LongTy:]] = OpTypeInt 64 0
-; CHECK-DAG: %[[#PtrCharTy:]] = OpTypePointer CrossWorkgroup %[[#CharTy]]
 ; CHECK-DAG: %[[#PtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#LongTy]]
 ; CHECK-DAG: %[[#IntTy:]] = OpTypeInt 32 0
 ; CHECK-DAG: %[[#Scope:]] = OpConstant %[[#IntTy]] 1
 ; CHECK-DAG: %[[#MemSem:]] = OpConstant %[[#IntTy]] 8
 ; CHECK-DAG: %[[#PtrPtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#PtrLongTy]]
+
 ; CHECK: OpFunction
-; CHECK: %[[#Arg1:]] = OpFunctionParameter %[[#PtrCharTy]]
+; CHECK: %[[#Arg1:]] = OpFunctionParameter %[[#PtrPtrLongTy]]
 ; CHECK: %[[#Arg2:]] = OpFunctionParameter %[[#PtrLongTy]]
-; CHECK: %[[#CastedArg1:]] = OpBitcast %[[#PtrPtrLongTy]] %[[#Arg1]]
-; CHECK: OpAtomicExchange %[[#PtrLongTy]] %[[#CastedArg1]] %[[#Scope]] %[[#MemSem]] %[[#Arg2]]
+; CHECK: OpAtomicExchange %[[#PtrLongTy]] %[[#Arg1]] %[[#Scope]] %[[#MemSem]] %[[#Arg2]]
 ; CHECK: OpFunctionEnd
 
-define dso_local spir_func void @test_atomicrmw(ptr addrspace(1) %arg1, ptr addrspace(1) byval(i64) %arg_ptr) {
+define dso_local spir_func void @test1(ptr addrspace(1) %arg1, ptr addrspace(1) byval(i64) %arg_ptr) {
 entry:
   %r = atomicrmw xchg ptr addrspace(1) %arg1, ptr addrspace(1) %arg_ptr acq_rel
   ret void
 }
+
+; CHECK: OpFunction
+; CHECK: %[[#Arg3:]] = OpFunctionParameter %[[#PtrLongTy]]
+; CHECK: %[[#Arg4:]] = OpFunctionParameter %[[#LongTy]]
+; CHECK: OpAtomicExchange %[[#LongTy]] %[[#Arg3]] %[[#Scope]] %[[#MemSem]] %[[#Arg4]]
+; CHECK: OpFunctionEnd
+
+define dso_local spir_func void @test2(ptr addrspace(1) %arg1, i64 %arg_ptr) {
+entry:
+  %r = atomicrmw xchg ptr addrspace(1) %arg1, i64 %arg_ptr acq_rel
+  ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
index 8a19fc78238c6..474636f86df02 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
@@ -15,6 +15,7 @@
 ; CHECK-DAG: OpName [[XOR:%.*]] "test_xor"
 
 ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0
+; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]]
 ; CHECK-DAG: [[I64Ty:%.*]] = OpTypeInt 64 0
 ;; Device scope is encoded with constant 1
 ; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1
@@ -22,11 +23,10 @@
 ; CHECK-DAG: [[RELAXED:%.*]] = OpConstantNull [[I32Ty]]
 
 ; CHECK:      [[ADD]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_add(i32* %ptr, i32 %val) {
@@ -35,11 +35,10 @@ define i32 @test_add(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[SUB]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_sub(i32* %ptr, i32 %val) {
@@ -48,11 +47,10 @@ define i32 @test_sub(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[MIN]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_min(i32* %ptr, i32 %val) {
@@ -61,11 +59,10 @@ define i32 @test_min(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[MAX]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_max(i32* %ptr, i32 %val) {
@@ -74,11 +71,10 @@ define i32 @test_max(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[UMIN]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_umin(i32* %ptr, i32 %val) {
@@ -87,11 +83,10 @@ define i32 @test_umin(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[UMAX]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_umax(i32* %ptr, i32 %val) {
@@ -100,11 +95,10 @@ define i32 @test_umax(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[AND]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_and(i32* %ptr, i32 %val) {
@@ -113,11 +107,10 @@ define i32 @test_and(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[OR]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_or(i32* %ptr, i32 %val) {
@@ -126,11 +119,10 @@ define i32 @test_or(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[XOR]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_xor(i32* %ptr, i32 %val) {
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll
index 950dfe417637f..d0c4531a75b65 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll
@@ -12,17 +12,17 @@
 ; CHECK-DAG: OpName [[XOR:%.*]] "test_xor"
 
 ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0
+; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]]
 ;; Device scope is encoded with constant 1
 ; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1
 ;; "acq_rel" maps to the constant 8
 ; CHECK-DAG: [[ACQREL:%.*]] = OpConstant [[I32Ty]] 8
 
 ; CHECK:      [[ADD]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_add(i32* %ptr, i32 %val) {
@@ -31,11 +31,10 @@ define i32 @test_add(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[SUB]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_sub(i32* %ptr, i32 %val) {
@@ -44,11 +43,10 @@ define i32 @test_sub(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[MIN]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_min(i32* %ptr, i32 %val) {
@@ -57,11 +55,10 @@ define i32 @test_min(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[MAX]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_max(i32* %ptr, i32 %val) {
@@ -70,11 +67,10 @@ define i32 @test_max(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[UMIN]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_umin(i32* %ptr, i32 %val) {
@@ -83,11 +79,10 @@ define i32 @test_umin(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[UMAX]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_umax(i32* %ptr, i32 %val) {
@@ -96,11 +91,10 @@ define i32 @test_umax(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[AND]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_and(i32* %ptr, i32 %val) {
@@ -109,11 +103,10 @@ define i32 @test_and(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[OR]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_or(i32* %ptr, i32 %val) {
@@ -122,11 +115,10 @@ define i32 @test_or(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[XOR]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_xor(i32* %ptr, i32 %val) {
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll
index f142e012dcb74..fc1d6dafa1b08 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll
@@ -12,17 +12,17 @@
 ; CHECK-DAG: OpName [[XOR:%.*]] "test_xor"
 
 ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0
+; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]]
 ;; Device scope is encoded with constant 1
 ; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1
 ;; "sequentially consistent" maps to constant 16
 ; CHECK-DAG: [[SEQ:%.*]] = OpConstant [[I32Ty]] 16
 
 ; CHECK:      [[ADD]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_add(i32* %ptr, i32 %val) {
@@ -31,11 +31,10 @@ define i32 @test_add(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[SUB]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_sub(i32* %ptr, i32 %val) {
@@ -44,11 +43,10 @@ define i32 @test_sub(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[MIN]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_min(i32* %ptr, i32 %val) {
@@ -57,11 +55,10 @@ define i32 @test_min(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[MAX]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_max(i32* %ptr, i32 %val) {
@@ -70,11 +67,10 @@ define i32 @test_max(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[UMIN]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_umin(i32* %ptr, i32 %val) {
@@ -83,11 +79,10 @@ define i32 @test_umin(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[UMAX]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_umax(i32* %ptr, i32 %val) {
@@ -96,11 +91,10 @@ define i32 @test_umax(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[AND]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_and(i32* %ptr, i32 %val) {
@@ -109,11 +103,10 @@ define i32 @test_and(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[OR]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_or(i32* %ptr, i32 %val) {
@@ -122,11 +115,10 @@ define i32 @test_or(i32* %ptr, i32 %val) {
 }
 
 ; CHECK:      [[XOR]] = OpFunction [[I32Ty]]
-; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter
-; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter
+; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
+; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
 ; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]]
-; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]]
 ; CHECK-NEXT: OpReturnValue [[R]]
 ; CHECK-NEXT: OpFunctionEnd
 define i32 @test_xor(i32* %ptr, i32 %val) {

>From 9612af49c816087f228c43166c585bf15d82543e Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Mon, 8 Jul 2024 15:37:53 -0700
Subject: [PATCH 4/5] type inference for atomic instructions

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 30 +++++++++++++++++++
 .../test/CodeGen/SPIRV/instructions/atomic.ll | 17 ++++++-----
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index ea42459929bd4..6f06c76a351f7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -46,6 +46,10 @@
 using namespace llvm;
 
 namespace llvm {
+namespace SPIRV {
+#define GET_BuiltinGroup_DECL
+#include "SPIRVGenTables.inc"
+} // namespace SPIRV
 void initializeSPIRVEmitIntrinsicsPass(PassRegistry &);
 } // namespace llvm
 
@@ -661,6 +665,32 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
               KnownElemTy = ElemTy; // src will rewrite dest if both are defined
             Ops.push_back(std::make_pair(Op, i));
           }
+        } else if (Grp == SPIRV::Atomic || Grp == SPIRV::AtomicFloating) {
+          if (CI->arg_size() < 2)
+            return;
+          Value *Op = CI->getArgOperand(0);
+          if (!isPointerTy(Op->getType()))
+            return;
+          switch (Opcode) {
+          case SPIRV::OpAtomicLoad:
+          case SPIRV::OpAtomicCompareExchangeWeak:
+          case SPIRV::OpAtomicCompareExchange:
+          case SPIRV::OpAtomicExchange:
+          case SPIRV::OpAtomicIAdd:
+          case SPIRV::OpAtomicISub:
+          case SPIRV::OpAtomicOr:
+          case SPIRV::OpAtomicXor:
+          case SPIRV::OpAtomicAnd:
+          case SPIRV::OpAtomicUMin:
+          case SPIRV::OpAtomicUMax:
+          case SPIRV::OpAtomicSMin:
+          case SPIRV::OpAtomicSMax: {
+            KnownElemTy = getAtomicElemTy(GR, I, Op);
+            if (!KnownElemTy)
+              return;
+            Ops.push_back(std::make_pair(Op, 0));
+          } break;
+          }
         }
       }
     }
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
index 474636f86df02..1ccbd5a61067d 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
@@ -17,6 +17,7 @@
 ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0
 ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]]
 ; CHECK-DAG: [[I64Ty:%.*]] = OpTypeInt 64 0
+; CHECK-DAG: [[PtrI64Ty:%.*]] = OpTypePointer Generic [[I64Ty]]
 ;; Device scope is encoded with constant 1
 ; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1
 ;; "monotonic" maps to the relaxed memory semantics, encoded with constant 0
@@ -131,13 +132,15 @@ define i32 @test_xor(i32* %ptr, i32 %val) {
 }
 
 ; CHECK: OpFunction
-; CHECK: [[Arg1:%.*]] = OpFunctionParameter
-; CHECK: [[Arg2:%.*]] = OpFunctionParameter
-; CHECK: OpAtomicSMin [[I64Ty]] %[[#]] [[SCOPE]] [[RELAXED]] [[Arg2]]
-; CHECK: OpAtomicSMax [[I64Ty]] %[[#]] [[SCOPE]] [[RELAXED]] [[Arg2]]
-; CHECK: OpAtomicUMin [[I64Ty]] %[[#]] [[SCOPE]] [[RELAXED]] [[Arg2]]
-; CHECK: OpAtomicUMax [[I64Ty]] %[[#]] [[SCOPE]] [[RELAXED]] [[Arg2]]
-; CHECK: OpFunctionEnd
+; CHECK-NEXT: [[Arg1:%.*]] = OpFunctionParameter [[PtrI64Ty]]
+; CHECK-NEXT: [[Arg2:%.*]] = OpFunctionParameter [[I64Ty]]
+; CHECK-NEXT: OpLabel
+; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK-NEXT: OpReturn
+; CHECK-NEXT: OpFunctionEnd
 define dso_local spir_kernel void @test_wrappers(ptr addrspace(4) %arg, i64 %val) {
   %r1 = call spir_func i64 @__spirv_AtomicSMin(ptr addrspace(4) %arg, i32 1, i32 0, i64 %val)
   %r2 = call spir_func i64 @__spirv_AtomicSMax(ptr addrspace(4) %arg, i32 1, i32 0, i64 %val)

>From f6d8b092594ea2243e49706659010425789db521 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Tue, 9 Jul 2024 08:40:13 -0700
Subject: [PATCH 5/5] improve type inference

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 103 ++++++++++----
 llvm/lib/Target/SPIRV/SPIRVUtils.h            |   2 +-
 .../pointers/type-deduce-by-call-chain.ll     |  12 +-
 .../SPIRV/pointers/type-deduce-sycl-stub.ll   | 127 ++++++++++++++++++
 .../transcoding/OpGroupAsyncCopy-strided.ll   |   2 +
 5 files changed, 211 insertions(+), 35 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/pointers/type-deduce-sycl-stub.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 6f06c76a351f7..5e7aec4965062 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -73,6 +73,9 @@ class SPIRVEmitIntrinsics
   DenseSet<Instruction *> AggrStores;
   SPIRV::InstructionSet::InstructionSet InstrSet;
 
+  // a register of Instructions that don't have a complete type definition
+  SmallVector<Instruction *> PostprocessWorklist;
+
   // deduce element type of untyped pointers
   Type *deduceElementType(Value *I, bool UnknownElemTypeI8);
   Type *deduceElementTypeHelper(Value *I, bool UnknownElemTypeI8);
@@ -86,6 +89,8 @@ class SPIRVEmitIntrinsics
   Type *deduceElementTypeByUsersDeep(Value *Op,
                                      std::unordered_set<Value *> &Visited,
                                      bool UnknownElemTypeI8);
+  void maybeAssignPtrType(Type *&Ty, Value *I, Type *RefTy,
+                          bool UnknownElemTypeI8);
 
   // deduce nested types of composites
   Type *deduceNestedTypeHelper(User *U, bool UnknownElemTypeI8);
@@ -94,7 +99,8 @@ class SPIRVEmitIntrinsics
                                bool UnknownElemTypeI8);
 
   // deduce Types of operands of the Instruction if possible
-  void deduceOperandElementType(Instruction *I);
+  void deduceOperandElementType(Instruction *I, Instruction *AskOp = 0,
+                                Type *AskTy = 0, CallInst *AssignCI = 0);
 
   void preprocessCompositeConstants(IRBuilder<> &B);
   void preprocessUndefs(IRBuilder<> &B);
@@ -161,6 +167,7 @@ class SPIRVEmitIntrinsics
 
   bool runOnModule(Module &M) override;
   bool runOnFunction(Function &F);
+  bool postprocessTypes();
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     ModulePass::getAnalysisUsage(AU);
@@ -233,6 +240,12 @@ static inline void reportFatalOnTokenType(const Instruction *I) {
                        false);
 }
 
+static bool IsKernelArgInt8(Function *F, StoreInst *SI) {
+  return SI && F->getCallingConv() == CallingConv::SPIR_KERNEL &&
+         isPointerTy(SI->getValueOperand()->getType()) &&
+         isa<Argument>(SI->getValueOperand());
+}
+
 // maybe restore original function return type
 static inline Type *restoreMutatedType(SPIRVGlobalRegistry *GR, Instruction *I,
                                        Type *Ty) {
@@ -353,6 +366,17 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(Value *I,
   return deduceElementTypeHelper(I, Visited, UnknownElemTypeI8);
 }
 
+void SPIRVEmitIntrinsics::maybeAssignPtrType(Type *&Ty, Value *Op, Type *RefTy,
+                                             bool UnknownElemTypeI8) {
+  if (isUntypedPointerTy(RefTy)) {
+    if (!UnknownElemTypeI8)
+      return;
+    if (auto *I = dyn_cast<Instruction>(Op))
+      PostprocessWorklist.push_back(I);
+  }
+  Ty = RefTy;
+}
+
 Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
     Value *I, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8) {
   // allow to pass nullptr as an argument
@@ -372,9 +396,7 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
   Type *Ty = nullptr;
   // look for known basic patterns of type inference
   if (auto *Ref = dyn_cast<AllocaInst>(I)) {
-    Type *RefTy = Ref->getAllocatedType();
-    if (UnknownElemTypeI8 || !isUntypedPointerTy(RefTy))
-      Ty = RefTy;
+    maybeAssignPtrType(Ty, I, Ref->getAllocatedType(), UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
     Ty = Ref->getResultElementType();
   } else if (auto *Ref = dyn_cast<GlobalValue>(I)) {
@@ -385,8 +407,7 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
   } else if (auto *Ref = dyn_cast<AddrSpaceCastInst>(I)) {
     Type *RefTy = deduceElementTypeHelper(Ref->getPointerOperand(), Visited,
                                           UnknownElemTypeI8);
-    if (UnknownElemTypeI8 || !isUntypedPointerTy(RefTy))
-      Ty = RefTy;
+    maybeAssignPtrType(Ty, I, RefTy, UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<BitCastInst>(I)) {
     if (Type *Src = Ref->getSrcTy(), *Dest = Ref->getDestTy();
         isPointerTy(Src) && isPointerTy(Dest))
@@ -558,7 +579,10 @@ static inline Type *getAtomicElemTy(SPIRVGlobalRegistry *GR, Instruction *I,
 // tries to deduce them. If the Instruction has Pointer operands with known
 // types which differ from expected, this function tries to insert a bitcast to
 // resolve the issue.
-void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
+void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
+                                                   Instruction *AskOp,
+                                                   Type *AskTy,
+                                                   CallInst *AskCI) {
   SmallVector<std::pair<Value *, unsigned>> Ops;
   Type *KnownElemTy = nullptr;
   // look for known basic patterns of type inference
@@ -586,9 +610,17 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
     Ops.push_back(std::make_pair(Ref->getPointerOperand(),
                                  LoadInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
-    KnownElemTy = Ref->getValueOperand()->getType();
-    if (isUntypedPointerTy(KnownElemTy))
+    if (IsKernelArgInt8(Ref->getParent()->getParent(), Ref))
       return;
+    Value *Op = Ref->getValueOperand();
+    KnownElemTy = Op->getType();
+    if (isUntypedPointerTy(KnownElemTy)) {
+      Type *NestedTy = GR->findDeducedElementType(Op);
+      if (!NestedTy)
+        return;
+      KnownElemTy =
+          getTypedPointerWrapper(NestedTy, getPointerAddressSpace(KnownElemTy));
+    }
     Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
     if (PointeeTy && !isUntypedPointerTy(PointeeTy))
       return;
@@ -704,17 +736,17 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
   IRBuilder<> B(Ctx);
   for (auto &OpIt : Ops) {
     Value *Op = OpIt.first;
-    if (Op->use_empty())
+    if (Op->use_empty() || (AskOp && Op != AskOp))
       continue;
-    Type *Ty = GR->findDeducedElementType(Op);
+    Type *Ty = AskOp ? AskTy : GR->findDeducedElementType(Op);
     if (Ty == KnownElemTy)
       continue;
-    Value *OpTyVal = Constant::getNullValue(KnownElemTy);
+    Value *OpTyVal = PoisonValue::get(KnownElemTy);
     Type *OpTy = Op->getType();
-    if (!Ty) {
+    if (!Ty || AskTy) {
       GR->addDeducedElementType(Op, KnownElemTy);
       // check if there is existing Intrinsic::spv_assign_ptr_type instruction
-      CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op);
+      CallInst *AssignCI = AskCI ? AskCI : GR->findAssignPtrTypeInstr(Op);
       if (AssignCI == nullptr) {
         Instruction *User = dyn_cast<Instruction>(Op->use_begin()->get());
         setInsertPointSkippingPhis(B, User ? User->getNextNode() : I);
@@ -1057,9 +1089,7 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
                                                          IRBuilder<> &B) {
   // Handle basic instructions:
   StoreInst *SI = dyn_cast<StoreInst>(I);
-  if (SI && F->getCallingConv() == CallingConv::SPIR_KERNEL &&
-      isPointerTy(SI->getValueOperand()->getType()) &&
-      isa<Argument>(SI->getValueOperand())) {
+  if (IsKernelArgInt8(F, SI)) {
     return replacePointerOperandWithPtrCast(
         I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0,
         B);
@@ -1575,7 +1605,6 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   for (auto &I : instructions(Func))
     Worklist.push_back(&I);
 
-  // SmallVector<Instruction *> Postponed;
   for (auto &I : Worklist) {
     // Don't emit intrinsincs for convergence intrinsics.
     if (isConvergenceIntrinsic(I))
@@ -1590,17 +1619,11 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
     // already, and force it to be i8 if not
     if (Postpone && !GR->findAssignPtrTypeInstr(I))
       insertAssignPtrTypeIntrs(I, B, true);
-    // Postponed.push_back(I);
   }
 
   for (auto &I : instructions(Func))
     deduceOperandElementType(&I);
 
-  //  for (auto &I : Postponed)
-  //      insertAssignPtrTypeIntrs(I, B, true);
-  //  for (auto IB = Postponed.rbegin(), IE = Postponed.rend(); IB != IE; ++IB)
-  //    insertAssignPtrTypeIntrs(*IB, B, true);
-
   for (auto *I : Worklist) {
     TrackConstants = true;
     if (!I->getType()->isVoidTy() || isa<StoreInst>(I))
@@ -1621,12 +1644,38 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   return true;
 }
 
+// Try to deduce a better type for pointers to untyped ptr.
+bool SPIRVEmitIntrinsics::postprocessTypes() {
+  bool Changed = false;
+  if (!GR)
+    return Changed;
+  for (auto IB = PostprocessWorklist.rbegin(), IE = PostprocessWorklist.rend();
+       IB != IE; ++IB) {
+    CallInst *AssignCI = GR->findAssignPtrTypeInstr(*IB);
+    Type *KnownTy = GR->findDeducedElementType(*IB);
+    if (!KnownTy || !AssignCI || !isa<Instruction>(AssignCI->getArgOperand(0)))
+      continue;
+    Instruction *I = cast<Instruction>(AssignCI->getArgOperand(0));
+    for (User *U : I->users()) {
+      Instruction *Inst = dyn_cast<Instruction>(U);
+      if (!Inst || isa<IntrinsicInst>(Inst))
+        continue;
+      deduceOperandElementType(Inst, I, KnownTy, AssignCI);
+      if (KnownTy != GR->findDeducedElementType(I)) {
+        Changed = true;
+        break;
+      }
+    }
+  }
+  return Changed;
+}
+
 bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
   bool Changed = false;
 
-  for (auto &F : M) {
+  PostprocessWorklist.clear();
+  for (auto &F : M)
     Changed |= runOnFunction(F);
-  }
 
   for (auto &F : M) {
     // check if function parameter types are set
@@ -1638,6 +1687,8 @@ bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
     }
   }
 
+  Changed |= postprocessTypes();
+
   return Changed;
 }
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 1daea3c2c1ec7..c757af6b8aa72 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -108,7 +108,7 @@ Type *parseBasicTypeName(StringRef &TypeName, LLVMContext &Ctx);
 
 // True if this is an instance of TypedPointerType.
 inline bool isTypedPointerTy(const Type *T) {
-  return T->getTypeID() == Type::TypedPointerTyID;
+  return T && T->getTypeID() == Type::TypedPointerTyID;
 }
 
 // True if this is an instance of PointerType.
diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll
index b039f80860daf..f060a97a57296 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll
@@ -18,11 +18,8 @@
 ; CHECK-SPIRV-DAG: %[[TyFunPtrLong:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrLong]]
 ; CHECK-SPIRV-DAG: %[[TyGenPtrPtrLong:.*]] = OpTypePointer Generic %[[TyGenPtrLong]]
 ; CHECK-SPIRV-DAG: %[[TyFunGenPtrLongLong:.*]] = OpTypeFunction %[[TyVoid]] %[[TyGenPtrLong]] %[[TyLong]]
-; CHECK-SPIRV-DAG: %[[TyChar:.*]] = OpTypeInt 8 0
-; CHECK-SPIRV-DAG: %[[TyGenPtrChar:.*]] = OpTypePointer Generic %[[TyChar]]
-; CHECK-SPIRV-DAG: %[[TyGenPtrPtrChar:.*]] = OpTypePointer Generic %[[TyGenPtrChar]]
-; CHECK-SPIRV-DAG: %[[TyFunPtrGenPtrChar:.*]] = OpTypePointer Function %[[TyGenPtrChar]]
 ; CHECK-SPIRV-DAG: %[[Const3:.*]] = OpConstant %[[TyLong]] 3
+; CHECK-SPIRV-DAG: %[[TyFunPtrGenPtrLong:.*]] = OpTypePointer Function %[[TyGenPtrLong]]
 
 ; CHECK-SPIRV: %[[FunTest]] = OpFunction %[[TyVoid]] None %[[TyFunPtrLong]]
 ; CHECK-SPIRV: %[[ArgCum]] = OpFunctionParameter %[[TyPtrLong]]
@@ -41,10 +38,9 @@
 ; CHECK-SPIRV: %[[StubObj]] = OpFunctionParameter %[[TyGenPtrLong]]
 ; CHECK-SPIRV: %[[MemOrder]] = OpFunctionParameter %[[TyLong]]
 
-; CHECK-SPIRV: %[[ObjectAddr:.*]] = OpVariable %[[TyFunPtrGenPtrChar]] Function
-; CHECK-SPIRV-NEXT: %[[ToGeneric:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[ObjectAddr]]
-; CHECK-SPIRV-NEXT: %[[Casted:.*]] = OpBitcast %[[TyGenPtrPtrLong]] %[[ToGeneric]]
-; CHECK-SPIRV-NEXT: OpStore %[[Casted]] %[[StubObj]]
+; CHECK-SPIRV: %[[ObjectAddr:.*]] = OpVariable %[[TyFunPtrGenPtrLong]] Function
+; CHECK-SPIRV-NEXT: %[[ToGeneric:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrLong]] %[[ObjectAddr]]
+; CHECK-SPIRV-NEXT: OpStore %[[ToGeneric]] %[[StubObj]]
 
 ; CHECK-SPIRV: %[[FooFunc]] = OpFunction %[[TyVoid]] None %[[TyFunGenPtrLongLong]]
 ; CHECK-SPIRV: %[[FooObj]] = OpFunctionParameter %[[TyGenPtrLong]]
diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-sycl-stub.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-sycl-stub.ll
new file mode 100644
index 0000000000000..63bf82ce7e2fd
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-sycl-stub.ll
@@ -0,0 +1,127 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=all --translator-compatibility-mode --avoid-spirv-capabilities=Shader %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=all --translator-compatibility-mode --avoid-spirv-capabilities=Shader %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-SPIRV-DAG: OpName %[[#F:]] "finish"
+; CHECK-SPIRV-DAG: OpName %[[#FH:]] "finish_helper"
+; CHECK-SPIRV-DAG: OpName %[[#S:]] "start"
+; CHECK-SPIRV-DAG: OpName %[[#SH:]] "start_helper"
+
+; CHECK-SPIRV-DAG: %[[#Long:]] = OpTypeInt 64 0
+; CHECK-SPIRV-DAG: %[[#FPtrLong:]] = OpTypePointer Function %[[#Long]]
+; CHECK-SPIRV-DAG: %[[#GPtrLong:]] = OpTypePointer Generic %[[#Long]]
+; CHECK-SPIRV-DAG: %[[#C3:]] = OpConstant %[[#]] 3
+; CHECK-SPIRV-DAG: %[[#Array3:]] = OpTypeArray %[[#Long]] %[[#C3]]
+; CHECK-SPIRV-DAG: %[[#PtrArray3:]] = OpTypePointer Generic %[[#Array3]]
+; CHECK-SPIRV-DAG: %[[#FPtrPtrArray3:]] = OpTypePointer Function %[[#PtrArray3]]
+; CHECK-SPIRV-DAG: %[[#GPtrPtrArray3:]] = OpTypePointer Generic %[[#PtrArray3]]
+
+; CHECK-SPIRV: %[[#FH]] = OpFunction
+; CHECK-SPIRV: %[[#Arg1:]] = OpFunctionParameter %[[#PtrArray3]]
+; CHECK-SPIRV: %[[#Arg2:]] = OpFunctionParameter %[[#Long]]
+; CHECK-SPIRV: %[[#GrpIdAddr:]] = OpVariable %[[#FPtrPtrArray3]] Function
+; CHECK-SPIRV: %[[#WIId:]] = OpVariable %[[#FPtrLong]] Function
+; CHECK-SPIRV: %[[#GenGrpIdAddr:]] = OpPtrCastToGeneric %[[#GPtrPtrArray3]] %[[#GrpIdAddr]]
+; CHECK-SPIRV: %[[#GenWIId:]] = OpPtrCastToGeneric %[[#GPtrLong]] %[[#WIId]]
+; CHECK-SPIRV: OpStore %[[#GenGrpIdAddr]] %[[#Arg1]]
+; CHECK-SPIRV: OpStore %[[#GenWIId]] %[[#Arg2]]
+; CHECK-SPIRV: OpReturn
+; CHECK-SPIRV: OpFunctionEnd
+
+ at __spirv_BuiltInWorkgroupId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInGlobalLinearId = external dso_local local_unnamed_addr addrspace(1) constant i64, align 8
+ at __spirv_BuiltInWorkgroupSize = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+
+define weak_odr dso_local spir_kernel void @foo() {
+entry:
+  call spir_func void @start()
+  call spir_func void @finish()
+  ret void
+}
+
+define dso_local spir_func void @start() {
+entry:
+  %GroupID = alloca [3 x i64], align 8
+  %call.i = tail call spir_func signext i8 @__spirv_SpecConstant(i32 noundef -9145239, i8 noundef signext 0)
+  %cmp.i.not = icmp eq i8 %call.i, 0
+  br i1 %cmp.i.not, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %GroupID.ascast = addrspacecast ptr %GroupID to ptr addrspace(4)
+  %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+  %1 = extractelement <3 x i64> %0, i64 0
+  store i64 %1, ptr %GroupID, align 8
+  %arrayinit.element = getelementptr inbounds i8, ptr %GroupID, i64 8
+  %2 = extractelement <3 x i64> %0, i64 1
+  store i64 %2, ptr %arrayinit.element, align 8
+  %arrayinit.element1 = getelementptr inbounds i8, ptr %GroupID, i64 16
+  %3 = extractelement <3 x i64> %0, i64 2
+  store i64 %3, ptr %arrayinit.element1, align 8
+  %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalLinearId, align 8
+  %5 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+  %6 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, i64 8), align 8
+  %mul = mul i64 %5, %6
+  %7 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, i64 16), align 16
+  %mul2 = mul i64 %mul, %7
+  %conv = trunc i64 %mul2 to i32
+  call spir_func void @start_helper(ptr addrspace(4) noundef %GroupID.ascast, i64 noundef %4, i32 noundef %conv)
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  ret void
+}
+
+define dso_local spir_func void @finish() {
+entry:
+  %GroupID = alloca [3 x i64], align 8
+  %call.i = tail call spir_func signext i8 @__spirv_SpecConstant(i32 noundef -9145239, i8 noundef signext 0)
+  %cmp.i.not = icmp eq i8 %call.i, 0
+  br i1 %cmp.i.not, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %GroupID.ascast = addrspacecast ptr %GroupID to ptr addrspace(4)
+  %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+  %1 = extractelement <3 x i64> %0, i64 0
+  store i64 %1, ptr %GroupID, align 8
+  %arrayinit.element = getelementptr inbounds i8, ptr %GroupID, i64 8
+  %2 = extractelement <3 x i64> %0, i64 1
+  store i64 %2, ptr %arrayinit.element, align 8
+  %arrayinit.element1 = getelementptr inbounds i8, ptr %GroupID, i64 16
+  %3 = extractelement <3 x i64> %0, i64 2
+  store i64 %3, ptr %arrayinit.element1, align 8
+  %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalLinearId, align 8
+  call spir_func void @finish_helper(ptr addrspace(4) noundef %GroupID.ascast, i64 noundef %4)
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  ret void
+}
+
+define dso_local spir_func void @start_helper(ptr addrspace(4) noundef %group_id, i64 noundef %wi_id, i32 noundef %wg_size) {
+entry:
+  %group_id.addr = alloca ptr addrspace(4), align 8
+  %wi_id.addr = alloca i64, align 8
+  %wg_size.addr = alloca i32, align 4
+  %group_id.addr.ascast = addrspacecast ptr %group_id.addr to ptr addrspace(4)
+  %wi_id.addr.ascast = addrspacecast ptr %wi_id.addr to ptr addrspace(4)
+  %wg_size.addr.ascast = addrspacecast ptr %wg_size.addr to ptr addrspace(4)
+  store ptr addrspace(4) %group_id, ptr addrspace(4) %group_id.addr.ascast, align 8
+  store i64 %wi_id, ptr addrspace(4) %wi_id.addr.ascast, align 8
+  store i32 %wg_size, ptr addrspace(4) %wg_size.addr.ascast, align 4
+  ret void
+}
+
+define dso_local spir_func void @finish_helper(ptr addrspace(4) noundef %group_id, i64 noundef %wi_id) {
+entry:
+  %group_id.addr = alloca ptr addrspace(4), align 8
+  %wi_id.addr = alloca i64, align 8
+  %group_id.addr.ascast = addrspacecast ptr %group_id.addr to ptr addrspace(4)
+  %wi_id.addr.ascast = addrspacecast ptr %wi_id.addr to ptr addrspace(4)
+  store ptr addrspace(4) %group_id, ptr addrspace(4) %group_id.addr.ascast, align 8
+  store i64 %wi_id, ptr addrspace(4) %wi_id.addr.ascast, align 8
+  ret void
+}
+
+declare dso_local spir_func signext i8 @__spirv_SpecConstant(i32 noundef, i8 noundef signext)
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
index c33edf2064eb4..4b74af143892a 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
@@ -23,6 +23,8 @@
 ; CHECK-SPIRV: OpGroupWaitEvents %[[#Scope]] %[[#Num]] %[[#PtrEventGen]]
 ; CHECK-SPIRV: OpFunctionEnd
 
+target triple = "spir64-unknown-unknown"
+
 define spir_kernel void @foo() {
   %event = alloca ptr, align 8
   %call = call spir_func ptr @_Z29async_work_group_strided_copyPU3AS3hPU3AS1Khmm9ocl_event(ptr null, ptr null, i64 123, i64 1, ptr null)



More information about the llvm-commits mailing list