[llvm] [SPIR-V] Improve type inference: fix types of return values in call lowering (PR #116609)

Vyacheslav Levytskyy via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 28 04:33:18 PST 2024


https://github.com/VyacheslavLevytskyy updated https://github.com/llvm/llvm-project/pull/116609

>From ed63e91d38634f981a214ae4fdad8a2a5003e45d Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Mon, 18 Nov 2024 04:47:27 -0800
Subject: [PATCH 01/17] Improve type inference: return values in call lowering

---
 llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp       | 18 +-----
 llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp   | 25 +++++++++
 .../SPIRV/pointers/builtin-ret-reg-type.ll    | 55 +++++++++++++++++++
 .../SPIRV/transcoding/OpGenericCastToPtr.ll   |  2 -
 4 files changed, 83 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 73dce230575d84..7f5794c63c711a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -2539,23 +2539,11 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
                                  SPIRVGlobalRegistry *GR) {
   LLVM_DEBUG(dbgs() << "Lowering builtin call: " << DemangledCall << "\n");
 
-  // SPIR-V type and return register.
-  Register ReturnRegister = OrigRet;
-  SPIRVType *ReturnType = nullptr;
-  if (OrigRetTy && !OrigRetTy->isVoidTy()) {
-    ReturnType = GR->assignTypeToVReg(OrigRetTy, ReturnRegister, MIRBuilder);
-    if (!MIRBuilder.getMRI()->getRegClassOrNull(ReturnRegister))
-      MIRBuilder.getMRI()->setRegClass(ReturnRegister,
-                                       GR->getRegClass(ReturnType));
-  } else if (OrigRetTy && OrigRetTy->isVoidTy()) {
-    ReturnRegister = MIRBuilder.getMRI()->createVirtualRegister(&IDRegClass);
-    MIRBuilder.getMRI()->setType(ReturnRegister, LLT::scalar(64));
-    ReturnType = GR->assignTypeToVReg(OrigRetTy, ReturnRegister, MIRBuilder);
-  }
-
   // Lookup the builtin in the TableGen records.
+  SPIRVType *SpvType = GR->getSPIRVTypeForVReg(OrigRet);
+  assert(SpvType && "Inconsistent return register: expected valid type info");
   std::unique_ptr<const IncomingCall> Call =
-      lookupBuiltin(DemangledCall, Set, ReturnRegister, ReturnType, Args);
+      lookupBuiltin(DemangledCall, Set, OrigRet, SpvType, Args);
 
   if (!Call) {
     LLVM_DEBUG(dbgs() << "Builtin record was not found!\n");
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 3c5397319aaf21..a7b6b0efa99551 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -539,6 +539,31 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
 
   if (isFunctionDecl && !DemangledName.empty() &&
       (canUseGLSL || canUseOpenCL)) {
+    if (ResVReg.isValid()) {
+      if (!GR->getSPIRVTypeForVReg(ResVReg)) {
+        const Type *RetTy = OrigRetTy;
+        if (auto *PtrRetTy = dyn_cast<PointerType>(OrigRetTy)) {
+          const Value *OrigValue = Info.OrigRet.OrigValue;
+          if (!OrigValue)
+            OrigValue = Info.CB;
+          if (OrigValue)
+            if (Type *ElemTy = GR->findDeducedElementType(OrigValue))
+              RetTy =
+                  TypedPointerType::get(ElemTy, PtrRetTy->getAddressSpace());
+        }
+        SPIRVType *SpvType = GR->getOrCreateSPIRVType(RetTy, MIRBuilder);
+        GR->assignSPIRVTypeToVReg(SpvType, ResVReg, MF);
+        if (!MRI->getRegClassOrNull(ResVReg)) {
+          MRI->setRegClass(ResVReg, GR->getRegClass(SpvType));
+          MRI->setType(ResVReg, GR->getRegType(SpvType));
+        }
+      }
+    } else {
+      SPIRVType *SpvType = GR->getOrCreateSPIRVType(OrigRetTy, MIRBuilder);
+      ResVReg = MRI->createVirtualRegister(GR->getRegClass(SpvType));
+      MRI->setType(ResVReg, GR->getRegType(SpvType));
+      GR->assignSPIRVTypeToVReg(SpvType, ResVReg, MF);
+    }
     SmallVector<Register, 8> ArgVRegs;
     for (auto Arg : Info.OrigArgs) {
       assert(Arg.Regs.size() == 1 && "Call arg has multiple VRegs");
diff --git a/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll b/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll
new file mode 100644
index 00000000000000..afa97ccfc0a69c
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll
@@ -0,0 +1,55 @@
+; The goal of the test case is to ensure that correct types are applied to virtual registers which were
+; used as return values in call lowering. Pass criterion is that spirv-val considers output valid.
+
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-SPIRV: OpFunction
+; CHECK-SPIRV: %[[#]] = OpPhi %[[#]] %[[#Ptr:]] %[[#]] %[[#]] %[[#]]
+; CHECK-SPIRV: %[[#Ptr]] = OpPtrAccessChain %[[#]] %[[#]] %[[#]]
+
+
+%t_half = type { half }
+%t_i17 = type { [17 x i32] }
+%t_h17 = type { [17 x %t_half] }
+
+define internal spir_func void @foo(i64 %arrayinit.cur.add_4, half %r1, ptr addrspace(4) noundef align 8 dereferenceable_or_null(72) %this) {
+entry:
+  %r_3 = alloca %t_h17, align 8
+  %p_src = alloca %t_i17, align 4
+  %p_src4 = addrspacecast ptr %p_src to ptr addrspace(4)
+  %call_2 = call spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef %p_src4, i32 noundef 7)
+  br label %l_body
+
+l_body:                                           ; preds = %l_body, %entry
+  %l_done = icmp eq i64 %arrayinit.cur.add_4, 34
+  br i1 %l_done, label %exit, label %l_body
+
+exit:                                             ; preds = %l_body
+  %0 = addrspacecast ptr %call_2 to ptr addrspace(4)
+  %call_6 = call spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef %0, i32 noundef 7)
+  br label %for.cond_3
+
+for.cond_3:                                       ; preds = %for.body_3, %exit
+  %lsr.iv1 = phi ptr [ %scevgep2, %for.body_3 ], [ %call_6, %exit ]
+  %lsr.iv = phi ptr [ %scevgep, %for.body_3 ], [ %r_3, %exit ]
+  %i.0_3 = phi i64 [ 0, %exit ], [ %inc_3, %for.body_3 ]
+  %cmp_3 = icmp ult i64 %i.0_3, 17
+  br i1 %cmp_3, label %for.body_3, label %exit2
+
+for.body_3:                                       ; preds = %for.cond_3
+  %call2_5 = call spir_func noundef half @_Z17__spirv_ocl_frexpDF16_PU3AS0i(half noundef %r1, ptr noundef %lsr.iv1)
+  store half %call2_5, ptr %lsr.iv, align 2
+  %inc_3 = add nuw nsw i64 %i.0_3, 1
+  %scevgep = getelementptr i8, ptr %lsr.iv, i64 2
+  %scevgep2 = getelementptr i8, ptr %lsr.iv1, i64 4
+  br label %for.cond_3
+
+exit2:                                            ; preds = %for.cond_3
+  ret void
+}
+
+declare dso_local spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef, i32 noundef)
+declare dso_local spir_func noundef half @_Z17__spirv_ocl_frexpDF16_PU3AS0i(half noundef, ptr noundef)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll
index 54b2c786747768..2cba0f6ebd74be 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll
@@ -2,9 +2,7 @@
 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
 ; CHECK-SPIRV-DAG: %[[#Char:]] = OpTypeInt 8 0
-; CHECK-SPIRV-DAG: %[[#GlobalCharPtr:]] = OpTypePointer CrossWorkgroup %[[#Char]]
 ; CHECK-SPIRV-DAG: %[[#LocalCharPtr:]] = OpTypePointer Workgroup %[[#Char]]
-; CHECK-SPIRV-DAG: %[[#PrivateCharPtr:]] = OpTypePointer Function %[[#Char]]
 ; CHECK-SPIRV-DAG: %[[#GenericCharPtr:]] = OpTypePointer Generic %[[#Char]]
 
 ; CHECK-SPIRV-DAG: %[[#Int:]] = OpTypeInt 32 0

>From 3dfd8e6a0999cf0e42e6acdc5e2b96fbe15ce90b Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Mon, 18 Nov 2024 07:50:54 -0800
Subject: [PATCH 02/17] add and use internal api call to create
 registers/assign types; fix v-reg type/class assignments

---
 llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp       | 10 ++--
 llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp   | 12 +----
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp |  4 +-
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h   |  2 +-
 llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp   |  4 +-
 llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp  |  5 +-
 llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp  |  9 +---
 llvm/lib/Target/SPIRV/SPIRVUtils.cpp          | 49 +++++++++++++++++++
 llvm/lib/Target/SPIRV/SPIRVUtils.h            | 16 ++++++
 9 files changed, 79 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 7f5794c63c711a..a1684b87722cb2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -469,12 +469,8 @@ static Register buildLoadInst(SPIRVType *BaseType, Register PtrRegister,
                               MachineIRBuilder &MIRBuilder,
                               SPIRVGlobalRegistry *GR, LLT LowLevelType,
                               Register DestinationReg = Register(0)) {
-  MachineRegisterInfo *MRI = MIRBuilder.getMRI();
-  if (!DestinationReg.isValid()) {
-    DestinationReg = MRI->createVirtualRegister(&SPIRV::iIDRegClass);
-    MRI->setType(DestinationReg, LLT::scalar(64));
-    GR->assignSPIRVTypeToVReg(BaseType, DestinationReg, MIRBuilder.getMF());
-  }
+  if (!DestinationReg.isValid())
+    DestinationReg = createVirtualRegister(BaseType, GR, MIRBuilder);
   // TODO: consider using correct address space and alignment (p0 is canonical
   // type for selection though).
   MachinePointerInfo PtrInfo = MachinePointerInfo();
@@ -2151,7 +2147,7 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call,
     const SPIRVType *PointerSizeTy = GR->getOrCreateSPIRVPointerType(
         Int32Ty, MIRBuilder, SPIRV::StorageClass::Function);
     for (unsigned I = 0; I < LocalSizeNum; ++I) {
-      Register Reg = MRI->createVirtualRegister(&SPIRV::iIDRegClass);
+      Register Reg = MRI->createVirtualRegister(&SPIRV::pIDRegClass);
       MRI->setType(Reg, LLType);
       GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF());
       auto GEPInst = MIRBuilder.buildIntrinsic(
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index a7b6b0efa99551..3fdaa6aa3257ea 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -551,18 +551,10 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
               RetTy =
                   TypedPointerType::get(ElemTy, PtrRetTy->getAddressSpace());
         }
-        SPIRVType *SpvType = GR->getOrCreateSPIRVType(RetTy, MIRBuilder);
-        GR->assignSPIRVTypeToVReg(SpvType, ResVReg, MF);
-        if (!MRI->getRegClassOrNull(ResVReg)) {
-          MRI->setRegClass(ResVReg, GR->getRegClass(SpvType));
-          MRI->setType(ResVReg, GR->getRegType(SpvType));
-        }
+        setRegClassType(ResVReg, RetTy, GR, MIRBuilder);
       }
     } else {
-      SPIRVType *SpvType = GR->getOrCreateSPIRVType(OrigRetTy, MIRBuilder);
-      ResVReg = MRI->createVirtualRegister(GR->getRegClass(SpvType));
-      MRI->setType(ResVReg, GR->getRegType(SpvType));
-      GR->assignSPIRVTypeToVReg(SpvType, ResVReg, MF);
+      ResVReg = createVirtualRegister(OrigRetTy, GR, MIRBuilder);
     }
     SmallVector<Register, 8> ArgVRegs;
     for (auto Arg : Info.OrigArgs) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 6f222883ee07de..4e539fcd6c9999 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -69,7 +69,7 @@ SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg(
 
 void SPIRVGlobalRegistry::assignSPIRVTypeToVReg(SPIRVType *SpirvType,
                                                 Register VReg,
-                                                MachineFunction &MF) {
+                                                const MachineFunction &MF) {
   VRegToTypeMap[&MF][VReg] = SpirvType;
 }
 
@@ -578,7 +578,7 @@ SPIRVGlobalRegistry::getOrCreateConstNullPtr(MachineIRBuilder &MIRBuilder,
   if (!Res.isValid()) {
     LLT LLTy = LLT::pointer(LLVMPtrTy->getAddressSpace(), PointerSize);
     Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
-    CurMF->getRegInfo().setRegClass(Res, &SPIRV::iIDRegClass);
+    CurMF->getRegInfo().setRegClass(Res, &SPIRV::pIDRegClass);
     assignSPIRVTypeToVReg(SpvType, Res, *CurMF);
     MIRBuilder.buildInstr(SPIRV::OpConstantNull)
         .addDef(Res)
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 3bb86e8be69500..ff4b0ea8757fa4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -330,7 +330,7 @@ class SPIRVGlobalRegistry {
   // In cases where the SPIR-V type is already known, this function can be
   // used to map it to the given VReg via an ASSIGN_TYPE instruction.
   void assignSPIRVTypeToVReg(SPIRVType *Type, Register VReg,
-                             MachineFunction &MF);
+                             const MachineFunction &MF);
 
   // Either generate a new OpTypeXXX instruction or return an existing one
   // corresponding to the given LLVM IR type.
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index 59a1bf50b771b9..b53ea1f7edf4a0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -111,8 +111,8 @@ static void doInsertBitcast(const SPIRVSubtarget &STI, MachineRegisterInfo *MRI,
                             SPIRVGlobalRegistry &GR, MachineInstr &I,
                             Register OpReg, unsigned OpIdx,
                             SPIRVType *NewPtrType) {
-  Register NewReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
   MachineIRBuilder MIB(I);
+  Register NewReg = createVirtualRegister(NewPtrType, &GR, MRI, MIB.getMF());
   bool Res = MIB.buildInstr(SPIRV::OpBitcast)
                  .addDef(NewReg)
                  .addUse(GR.getSPIRVTypeID(NewPtrType))
@@ -121,8 +121,6 @@ static void doInsertBitcast(const SPIRVSubtarget &STI, MachineRegisterInfo *MRI,
                                    *STI.getRegBankInfo());
   if (!Res)
     report_fatal_error("insert validation bitcast: cannot constrain all uses");
-  MRI->setRegClass(NewReg, &SPIRV::iIDRegClass);
-  GR.assignSPIRVTypeToVReg(NewPtrType, NewReg, MIB.getMF());
   I.getOperand(OpIdx).setReg(NewReg);
 }
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index 460f0127d4ffcd..bd04b8c1c0b333 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -357,12 +357,13 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
   verify(*ST.getInstrInfo());
 }
 
-static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType,
+static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpvType,
                                 LegalizerHelper &Helper,
                                 MachineRegisterInfo &MRI,
                                 SPIRVGlobalRegistry *GR) {
   Register ConvReg = MRI.createGenericVirtualRegister(ConvTy);
-  GR->assignSPIRVTypeToVReg(SpirvType, ConvReg, Helper.MIRBuilder.getMF());
+  MRI.setRegClass(ConvReg, GR->getRegClass(SpvType));
+  GR->assignSPIRVTypeToVReg(SpvType, ConvReg, Helper.MIRBuilder.getMF());
   Helper.MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
       .addDef(ConvReg)
       .addUse(Reg);
diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
index 11b9e4f6f6d17b..3373d8e24dab48 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
@@ -102,10 +102,7 @@ static void processNewInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
           if (!ResType) {
             // There was no "assign type" actions, let's fix this now
             ResType = ScalarType;
-            MRI.setRegClass(ResVReg, &SPIRV::iIDRegClass);
-            MRI.setType(ResVReg,
-                        LLT::scalar(GR->getScalarOrVectorBitWidth(ResType)));
-            GR->assignSPIRVTypeToVReg(ResType, ResVReg, *GR->CurMF);
+            setRegClassType(ResVReg, ResType, GR, &MRI, *GR->CurMF, true);
           }
         }
       } else if (mayBeInserted(Opcode) && I.getNumDefs() == 1 &&
@@ -124,9 +121,7 @@ static void processNewInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
           if (!ResVType)
             continue;
           // Set type & class
-          MRI.setRegClass(ResVReg, GR->getRegClass(ResVType));
-          MRI.setType(ResVReg, GR->getRegType(ResVType));
-          GR->assignSPIRVTypeToVReg(ResVType, ResVReg, *GR->CurMF);
+          setRegClassType(ResVReg, ResVType, GR, &MRI, *GR->CurMF, true);
         }
         // If this is a simple operation that is to be reduced by TableGen
         // definition we must apply some of pre-legalizer rules here
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index ad8dfa0e8811b7..c5bf02f1a4bca3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -13,6 +13,7 @@
 #include "SPIRVUtils.h"
 #include "MCTargetDesc/SPIRVBaseInfo.h"
 #include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
 #include "SPIRVInstrInfo.h"
 #include "SPIRVSubtarget.h"
 #include "llvm/ADT/StringRef.h"
@@ -679,4 +680,52 @@ bool getVacantFunctionName(Module &M, std::string &Name) {
   return false;
 }
 
+// Assign SPIR-V type to the register. If the register has no valid assigned
+// class, set register LLT type and class according to the SPIR-V type.
+void setRegClassType(Register Reg, SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+                     MachineRegisterInfo *MRI, const MachineFunction &MF,
+                     bool Force) {
+  GR->assignSPIRVTypeToVReg(SpvType, Reg, MF);
+  if (!MRI->getRegClassOrNull(Reg) || Force) {
+    MRI->setRegClass(Reg, GR->getRegClass(SpvType));
+    MRI->setType(Reg, GR->getRegType(SpvType));
+  }
+}
+
+// Create a SPIR-V type, assign SPIR-V type to the register. If the register has
+// no valid assigned class, set register LLT type and class according to the
+// SPIR-V type.
+void setRegClassType(Register Reg, const Type *Ty, SPIRVGlobalRegistry *GR,
+                     MachineIRBuilder &MIRBuilder, bool Force) {
+  setRegClassType(Reg, GR->getOrCreateSPIRVType(Ty, MIRBuilder), GR,
+                  MIRBuilder.getMRI(), MIRBuilder.getMF(), Force);
+}
+
+// Create a virtual register and assign SPIR-V type to the register. Set
+// register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+                               MachineRegisterInfo *MRI,
+                               const MachineFunction &MF) {
+  Register Reg = MRI->createVirtualRegister(GR->getRegClass(SpvType));
+  MRI->setType(Reg, GR->getRegType(SpvType));
+  GR->assignSPIRVTypeToVReg(SpvType, Reg, MF);
+  return Reg;
+}
+
+// Create a virtual register and assign SPIR-V type to the register. Set
+// register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+                               MachineIRBuilder &MIRBuilder) {
+  return createVirtualRegister(SpvType, GR, MIRBuilder.getMRI(),
+                               MIRBuilder.getMF());
+}
+
+// Create a SPIR-V type, virtual register and assign SPIR-V type to the
+// register. Set register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR,
+                               MachineIRBuilder &MIRBuilder) {
+  return createVirtualRegister(GR->getOrCreateSPIRVType(Ty, MIRBuilder), GR,
+                               MIRBuilder);
+}
+
 } // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index da0e8769cac1b6..5ed2303f4e3d3b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -34,6 +34,7 @@ class Register;
 class StringRef;
 class SPIRVInstrInfo;
 class SPIRVSubtarget;
+class SPIRVGlobalRegistry;
 
 // This class implements a partial ordering visitor, which visits a cyclic graph
 // in natural topological-like ordering. Topological ordering is not defined for
@@ -357,5 +358,20 @@ MachineInstr *getVRegDef(MachineRegisterInfo &MRI, Register Reg);
 #define SPIRV_BACKEND_SERVICE_FUN_NAME "__spirv_backend_service_fun"
 bool getVacantFunctionName(Module &M, std::string &Name);
 
+void setRegClassType(Register Reg, const Type *Ty, SPIRVGlobalRegistry *GR,
+                     MachineIRBuilder &MIRBuilder, bool Force = false);
+void setRegClassType(Register Reg, const MachineInstr *SpvType,
+                     SPIRVGlobalRegistry *GR, MachineRegisterInfo *MRI,
+                     const MachineFunction &MF, bool Force = false);
+Register createVirtualRegister(const MachineInstr *SpvType,
+                               SPIRVGlobalRegistry *GR,
+                               MachineRegisterInfo *MRI,
+                               const MachineFunction &MF);
+Register createVirtualRegister(const MachineInstr *SpvType,
+                               SPIRVGlobalRegistry *GR,
+                               MachineIRBuilder &MIRBuilder);
+Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR,
+                               MachineIRBuilder &MIRBuilder);
+
 } // namespace llvm
 #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H

>From aa685dda5917a1eee69713b4838e615f25fc6af6 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Tue, 19 Nov 2024 12:55:15 -0800
Subject: [PATCH 03/17] improve type inference: change processing order,
 calculate uncomplete types, speed up postprocessing of types

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 164 ++++++++++++------
 .../fp_two_calls.ll                           |  12 +-
 .../CodeGen/SPIRV/pointers/phi-chain-types.ll |  82 +++++++++
 3 files changed, 200 insertions(+), 58 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index e6ef40e010dc20..c98c22641273ec 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -78,6 +78,11 @@ class SPIRVEmitIntrinsics
   // a register of Instructions that don't have a complete type definition
   DenseMap<Value *, unsigned> UncompleteTypeInfo;
   SmallVector<Value *> PostprocessWorklist;
+  void addToUncompleteTypeInfo(Value *Op) {
+    auto It = UncompleteTypeInfo.try_emplace(Op, PostprocessWorklist.size());
+    if (It.second)
+      PostprocessWorklist.push_back(Op);
+  }
 
   // well known result types of builtins
   enum WellKnownTypes { Event };
@@ -105,8 +110,9 @@ class SPIRVEmitIntrinsics
                                bool UnknownElemTypeI8);
 
   // deduce Types of operands of the Instruction if possible
-  void deduceOperandElementType(Instruction *I, Instruction *AskOp = 0,
-                                Type *AskTy = 0, CallInst *AssignCI = 0);
+  void deduceOperandElementType(Instruction *I,
+                                const SmallPtrSet<Value *, 4> *AskOps = nullptr,
+                                SmallPtrSet<Value *, 16> *Completed = nullptr);
 
   void preprocessCompositeConstants(IRBuilder<> &B);
   void preprocessUndefs(IRBuilder<> &B);
@@ -145,12 +151,20 @@ class SPIRVEmitIntrinsics
   Type *deduceFunParamElementType(Function *F, unsigned OpIdx);
   Type *deduceFunParamElementType(Function *F, unsigned OpIdx,
                                   std::unordered_set<Function *> &FVisited);
+
+  bool deduceOperandElementTypeCalledFunction(
+      SPIRV::InstructionSet::InstructionSet InstrSet, CallInst *CI,
+      SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy);
+  void deduceOperandElementTypeFunctionPointer(
+      CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+      Type *&KnownElemTy, SmallPtrSet<Value *, 16> *Completed);
+
   void replaceWithPtrcasted(Instruction *CI, Type *NewElemTy, Type *KnownElemTy,
                             CallInst *AssignCI);
   void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true);
 
   bool runOnFunction(Function &F);
-  bool postprocessTypes();
+  bool postprocessTypes(Module &M);
   bool processFunctionPointers(Module &M);
 
 public:
@@ -286,11 +300,11 @@ void SPIRVEmitIntrinsics::replaceAllUsesWith(Value *Src, Value *Dest,
   if (DeleteOld) {
     unsigned Pos = It->second;
     UncompleteTypeInfo.erase(Src);
-    UncompleteTypeInfo[Dest] = Pos;
-    PostprocessWorklist[Pos] = Dest;
+    auto It = UncompleteTypeInfo.try_emplace(Dest, Pos);
+    if (It.second)
+      PostprocessWorklist[Pos] = Dest;
   } else {
-    UncompleteTypeInfo[Dest] = PostprocessWorklist.size();
-    PostprocessWorklist.push_back(Dest);
+    addToUncompleteTypeInfo(Dest);
   }
 }
 
@@ -455,10 +469,7 @@ void SPIRVEmitIntrinsics::maybeAssignPtrType(Type *&Ty, Value *Op, Type *RefTy,
   if (isUntypedPointerTy(RefTy)) {
     if (!UnknownElemTypeI8)
       return;
-    if (auto *I = dyn_cast<Instruction>(Op)) {
-      UncompleteTypeInfo[I] = PostprocessWorklist.size();
-      PostprocessWorklist.push_back(I);
-    }
+    addToUncompleteTypeInfo(Op);
   }
   Ty = RefTy;
 }
@@ -661,10 +672,7 @@ Type *SPIRVEmitIntrinsics::deduceElementType(Value *I, bool UnknownElemTypeI8) {
     return Ty;
   if (!UnknownElemTypeI8)
     return nullptr;
-  if (auto *Instr = dyn_cast<Instruction>(I)) {
-    UncompleteTypeInfo[Instr] = PostprocessWorklist.size();
-    PostprocessWorklist.push_back(Instr);
-  }
+  addToUncompleteTypeInfo(I);
   return IntegerType::getInt8Ty(I->getContext());
 }
 
@@ -683,8 +691,7 @@ static inline Type *getAtomicElemTy(SPIRVGlobalRegistry *GR, Instruction *I,
 
 // Try to deduce element type for a call base. Returns false if this is an
 // indirect function invocation, and true otherwise.
-static bool deduceOperandElementTypeCalledFunction(
-    SPIRVGlobalRegistry *GR, Instruction *I,
+bool SPIRVEmitIntrinsics::deduceOperandElementTypeCalledFunction(
     SPIRV::InstructionSet::InstructionSet InstrSet, CallInst *CI,
     SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy) {
   Function *CalledF = CI->getCalledFunction();
@@ -726,7 +733,7 @@ static bool deduceOperandElementTypeCalledFunction(
       case SPIRV::OpAtomicUMax:
       case SPIRV::OpAtomicSMin:
       case SPIRV::OpAtomicSMax: {
-        KnownElemTy = getAtomicElemTy(GR, I, Op);
+        KnownElemTy = getAtomicElemTy(GR, CI, Op);
         if (!KnownElemTy)
           return true;
         Ops.push_back(std::make_pair(Op, 0));
@@ -738,32 +745,44 @@ static bool deduceOperandElementTypeCalledFunction(
 }
 
 // Try to deduce element type for a function pointer.
-static void deduceOperandElementTypeFunctionPointer(
-    SPIRVGlobalRegistry *GR, Instruction *I, CallInst *CI,
-    SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy) {
+void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
+    CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+    Type *&KnownElemTy, SmallPtrSet<Value *, 16> *Completed) {
   Value *Op = CI->getCalledOperand();
   if (!Op || !isPointerTy(Op->getType()))
     return;
   Ops.push_back(std::make_pair(Op, std::numeric_limits<unsigned>::max()));
   FunctionType *FTy = CI->getFunctionType();
-  bool IsNewFTy = false;
+  bool IsNewFTy = false, IsUncomplete = false;
   SmallVector<Type *, 4> ArgTys;
   for (Value *Arg : CI->args()) {
     Type *ArgTy = Arg->getType();
-    if (ArgTy->isPointerTy())
+    if (ArgTy->isPointerTy()) {
       if (Type *ElemTy = GR->findDeducedElementType(Arg)) {
         IsNewFTy = true;
         ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy));
+        if (UncompleteTypeInfo.contains(Arg))
+          IsUncomplete = true;
+      } else {
+        IsUncomplete = true;
       }
+    }
     ArgTys.push_back(ArgTy);
   }
   Type *RetTy = FTy->getReturnType();
-  if (I->getType()->isPointerTy())
-    if (Type *ElemTy = GR->findDeducedElementType(I)) {
+  if (CI->getType()->isPointerTy()) {
+    if (Type *ElemTy = GR->findDeducedElementType(CI)) {
       IsNewFTy = true;
       RetTy =
-          TypedPointerType::get(ElemTy, getPointerAddressSpace(I->getType()));
+          TypedPointerType::get(ElemTy, getPointerAddressSpace(CI->getType()));
+      if (UncompleteTypeInfo.contains(CI))
+        IsUncomplete = true;
+    } else {
+      IsUncomplete = true;
     }
+  }
+  if (!Completed && IsUncomplete)
+    addToUncompleteTypeInfo(Op);
   KnownElemTy =
       IsNewFTy ? FunctionType::get(RetTy, ArgTys, FTy->isVarArg()) : FTy;
 }
@@ -772,10 +791,9 @@ static void deduceOperandElementTypeFunctionPointer(
 // tries to deduce them. If the Instruction has Pointer operands with known
 // types which differ from expected, this function tries to insert a bitcast to
 // resolve the issue.
-void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
-                                                   Instruction *AskOp,
-                                                   Type *AskTy,
-                                                   CallInst *AskCI) {
+void SPIRVEmitIntrinsics::deduceOperandElementType(
+    Instruction *I, const SmallPtrSet<Value *, 4> *AskOps,
+    SmallPtrSet<Value *, 16> *Completed) {
   SmallVector<std::pair<Value *, unsigned>> Ops;
   Type *KnownElemTy = nullptr;
   // look for known basic patterns of type inference
@@ -875,10 +893,9 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
     }
   } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
     if (!CI->isIndirectCall())
-      deduceOperandElementTypeCalledFunction(GR, I, InstrSet, CI, Ops,
-                                             KnownElemTy);
+      deduceOperandElementTypeCalledFunction(InstrSet, CI, Ops, KnownElemTy);
     else if (HaveFunPtrs)
-      deduceOperandElementTypeFunctionPointer(GR, I, CI, Ops, KnownElemTy);
+      deduceOperandElementTypeFunctionPointer(CI, Ops, KnownElemTy, Completed);
   }
 
   // There is no enough info to deduce types or all is valid.
@@ -889,9 +906,19 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
   IRBuilder<> B(Ctx);
   for (auto &OpIt : Ops) {
     Value *Op = OpIt.first;
-    if (Op->use_empty() || (AskOp && Op != AskOp))
+    if (Op->use_empty())
       continue;
-    Type *Ty = AskOp ? AskTy : GR->findDeducedElementType(Op);
+    Type *AskTy = nullptr;
+    CallInst *AskCI = nullptr;
+    if (AskOps) {
+      auto It = AskOps->find(Op);
+      if (It == AskOps->end())
+        continue;
+      AskTy = GR->findDeducedElementType(Op);
+      AskCI = GR->findAssignPtrTypeInstr(Op);
+      assert(AskTy && AskCI);
+    }
+    Type *Ty = AskTy ? AskTy : GR->findDeducedElementType(Op);
     if (Ty == KnownElemTy)
       continue;
     Value *OpTyVal = PoisonValue::get(KnownElemTy);
@@ -899,6 +926,9 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
     if (!Ty || AskTy || isUntypedPointerTy(Ty) ||
         UncompleteTypeInfo.contains(Op)) {
       GR->addDeducedElementType(Op, KnownElemTy);
+      // check if KnownElemTy is complete
+      if (!Completed && UncompleteTypeInfo.contains(I))
+        addToUncompleteTypeInfo(Op);
       // check if there is existing Intrinsic::spv_assign_ptr_type instruction
       CallInst *AssignCI = AskCI ? AskCI : GR->findAssignPtrTypeInstr(Op);
       if (AssignCI == nullptr) {
@@ -910,6 +940,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
         GR->addAssignPtrTypeInstr(Op, CI);
       } else {
         updateAssignType(AssignCI, Op, OpTyVal);
+        if (Completed)
+          Completed->insert(Op);
       }
     } else {
       if (auto *OpI = dyn_cast<Instruction>(Op)) {
@@ -1878,6 +1910,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   for (auto &I : instructions(Func))
     Worklist.push_back(&I);
 
+  // Pass forward: use operand to deduce instructions result.
   for (auto &I : Worklist) {
     // Don't emit intrinsincs for convergence intrinsics.
     if (isConvergenceIntrinsic(I))
@@ -1894,9 +1927,17 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
       insertAssignPtrTypeIntrs(I, B, true);
   }
 
-  for (auto &I : instructions(Func))
+  // Pass backward: use instructions results to specify/update/cast operands
+  // where needed.
+  for (auto &I : llvm::reverse(instructions(Func)))
     deduceOperandElementType(&I);
 
+  // Pass forward for PHIs only, their operands are not preceed the instruction
+  // in meaning of `instructions(Func)`.
+  for (BasicBlock &BB : Func)
+    for (PHINode &Phi : BB.phis())
+      deduceOperandElementType(&Phi);
+
   for (auto *I : Worklist) {
     TrackConstants = true;
     if (!I->getType()->isVoidTy() || isa<StoreInst>(I))
@@ -1938,16 +1979,19 @@ void SPIRVEmitIntrinsics::replaceWithPtrcasted(Instruction *CI, Type *NewElemTy,
 }
 
 // Try to deduce a better type for pointers to untyped ptr.
-bool SPIRVEmitIntrinsics::postprocessTypes() {
-  bool Changed = false;
-  if (!GR)
-    return Changed;
+bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
+  if (!GR || UncompleteTypeInfo.size() == 0)
+    return false;
+
+  DenseMap<Value *, SmallPtrSet<Value *, 4>> ToProcess;
+  SmallPtrSet<Value *, 16> Completed;
   for (auto IB = PostprocessWorklist.rbegin(), IE = PostprocessWorklist.rend();
        IB != IE; ++IB) {
     CallInst *AssignCI = GR->findAssignPtrTypeInstr(*IB);
     Type *KnownTy = GR->findDeducedElementType(*IB);
-    if (!KnownTy || !AssignCI || !isa<Instruction>(AssignCI->getArgOperand(0)))
+    if (!KnownTy || !AssignCI)
       continue;
+    assert(AssignCI->getArgOperand(0) == *IB);
     // Try to improve the type deduced after all Functions are processed.
     if (auto *CI = dyn_cast<CallInst>(*IB)) {
       if (Function *CalledF = CI->getCalledFunction()) {
@@ -1955,24 +1999,37 @@ bool SPIRVEmitIntrinsics::postprocessTypes() {
         // Fix inconsistency between known type and function's return type.
         if (RetElemTy && RetElemTy != KnownTy) {
           replaceWithPtrcasted(CI, RetElemTy, KnownTy, AssignCI);
-          Changed = true;
+          Completed.insert(CI);
           continue;
         }
       }
     }
-    Instruction *I = cast<Instruction>(AssignCI->getArgOperand(0));
-    for (User *U : I->users()) {
+    Value *Op = AssignCI->getArgOperand(0);
+    for (User *U : Op->users()) {
       Instruction *Inst = dyn_cast<Instruction>(U);
-      if (!Inst || isa<IntrinsicInst>(Inst))
+      if (Inst && !isa<IntrinsicInst>(Inst))
+        ToProcess[Inst].insert(Op);
+    }
+  }
+  if (Completed.size() >= UncompleteTypeInfo.size())
+    return true;
+
+  for (auto &F : M) {
+    for (auto &I : llvm::reverse(instructions(F))) {
+      auto It = ToProcess.find(&I);
+      if (It == ToProcess.end())
         continue;
-      deduceOperandElementType(Inst, I, KnownTy, AssignCI);
-      if (KnownTy != GR->findDeducedElementType(I)) {
-        Changed = true;
-        break;
-      }
+      It->second.remove_if(
+          [&Completed](Value *V) { return Completed.contains(V); });
+      if (It->second.size() == 0)
+        continue;
+      deduceOperandElementType(&I, &It->second, &Completed);
+      if (Completed.size() >= UncompleteTypeInfo.size())
+        return true;
     }
   }
-  return Changed;
+
+  return Completed.size() > 0;
 }
 
 bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
@@ -1983,17 +2040,16 @@ bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
   for (auto &F : M)
     Changed |= runOnFunction(F);
 
+  // Specify function parameters after all functions were processed.
   for (auto &F : M) {
     // check if function parameter types are set
     if (!F.isDeclaration() && !F.isIntrinsic()) {
-      const SPIRVSubtarget &ST = TM->getSubtarget<SPIRVSubtarget>(F);
-      GR = ST.getSPIRVGlobalRegistry();
       IRBuilder<> B(F.getContext());
       processParamTypes(&F, B);
     }
   }
 
-  Changed |= postprocessTypes();
+  Changed |= postprocessTypes(M);
   if (HaveFunPtrs)
     Changed |= processFunctionPointers(M);
 
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
index eb7b1dffaee501..621d06aa4aadee 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s
 ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
 ; CHECK-DAG: OpCapability Int8
@@ -15,10 +15,14 @@
 ; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0
 ; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0
 ; CHECK-DAG: %[[TyPtrInt8:.*]] = OpTypePointer Function %[[TyInt8]]
-; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrInt8]]
-; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]]
-; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrFp]] %[[TyPtrInt8]]
+; CHECK-DAG: %[[TyUncompleteBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrInt8]] %[[TyPtrInt8]]
+; CHECK-DAG: %[[TyPtrUncompleteBar:.*]] = OpTypePointer Function %[[TyUncompleteBar]]
+; CHECK-DAG: %[[TyUncompleteFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrUncompleteBar]]
+; CHECK-DAG: %[[TyPtrUncompleteFp:.*]] = OpTypePointer Function %[[TyUncompleteFp]]
+; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrUncompleteFp]] %[[TyPtrInt8]]
 ; CHECK-DAG: %[[TyPtrBar:.*]] = OpTypePointer Function %[[TyBar]]
+; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrBar]]
+; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]]
 ; CHECK-DAG: %[[TyTest:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrFp]] %[[TyPtrInt8]] %[[TyPtrBar]]
 ; CHECK: %[[test]] = OpFunction %[[TyVoid]] None %[[TyTest]]
 ; CHECK: %[[fp]] = OpFunctionParameter %[[TyPtrFp]]
diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll
new file mode 100644
index 00000000000000..a9e79df259c4fb
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll
@@ -0,0 +1,82 @@
+; The goal of the test case is to ensure that correct types are applied to PHI's as arguments of other PHI's.
+; Pass criterion is that spirv-val considers output valid.
+
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+
+; CHECK-DAG: OpName %[[#Foo:]] "foo"
+; CHECK-DAG: OpName %[[#FooVal1:]] "val1"
+; CHECK-DAG: OpName %[[#FooVal2:]] "val2"
+; CHECK-DAG: OpName %[[#FooVal3:]] "val3"
+; CHECK-DAG: OpName %[[#Bar:]] "bar"
+; CHECK-DAG: OpName %[[#BarVal1:]] "val1"
+; CHECK-DAG: OpName %[[#BarVal2:]] "val2"
+; CHECK-DAG: OpName %[[#BarVal3:]] "val3"
+
+; CHECK-DAG: %[[#Short:]] = OpTypeInt 16 0
+; CHECK-DAG: %[[#ShortGenPtr:]] = OpTypePointer Generic %[[#Short]]
+; CHECK-DAG: %[[#ShortWrkPtr:]] = OpTypePointer Workgroup %[[#Short]]
+; CHECK-DAG: %[[#G1:]] = OpVariable %[[#ShortWrkPtr]] Workgroup
+
+; CHECK: %[[#Foo:]] = OpFunction %[[#]] None %[[#]]
+; CHECK: %[[#FooArgP:]] = OpFunctionParameter %[[#ShortGenPtr]]
+; CHECK: OpFunctionParameter
+; CHECK: OpFunctionParameter
+; CHECK: OpFunctionParameter
+; CHECK: %[[#FooG1:]] = OpPtrCastToGeneric %[[#ShortGenPtr]] %[[#G1]]
+; CHECK: %[[#FooVal2]] = OpPhi %[[#ShortGenPtr]] %[[#FooArgP]] %[[#]] %[[#FooVal3]] %[[#]]
+; CHECK: %[[#FooVal1]] = OpPhi %[[#ShortGenPtr]] %[[#FooG1]] %[[#]] %[[#FooVal2]] %[[#]]
+; CHECK: %[[#FooVal3]] = OpLoad %[[#ShortGenPtr]] %[[#]]
+
+; CHECK: %[[#Bar:]] = OpFunction %[[#]] None %[[#]]
+; CHECK: %[[#BarArgP:]] = OpFunctionParameter %[[#ShortGenPtr]]
+; CHECK: OpFunctionParameter
+; CHECK: OpFunctionParameter
+; CHECK: OpFunctionParameter
+; CHECK: %[[#BarVal3]] = OpLoad %[[#ShortGenPtr]] %[[#]]
+; CHECK: %[[#BarG1:]] = OpPtrCastToGeneric %[[#ShortGenPtr]] %[[#G1]]
+; CHECK: %[[#BarVal1]] = OpPhi %[[#ShortGenPtr]] %[[#BarG1]] %[[#]] %[[#BarVal2]] %[[#]]
+; CHECK: %[[#BarVal2]] = OpPhi %[[#ShortGenPtr]] %[[#BarArgP]] %[[#]] %[[#BarVal3]] %[[#]]
+
+ at G1 = internal addrspace(3) global i16 undef, align 8
+ at G2 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+
+define spir_kernel void @foo(ptr addrspace(4) %p, i1 %f1, i1 %f2, i1 %f3) {
+entry:
+  br label %l1
+
+l1:
+  br i1 %f1, label %l2, label %exit
+
+l2:
+  %val2 = phi ptr addrspace(4) [ %p, %l1 ], [ %val3, %l3 ]
+  %val1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @G1 to ptr addrspace(4)), %l1 ], [ %val2, %l3 ]
+  br i1 %f2, label %l3, label %exit
+
+l3:
+  %val3 = load ptr addrspace(4), ptr addrspace(3) @G2, align 8
+  br i1 %f3, label %l2, label %exit
+
+exit:
+  ret void
+}
+
+define spir_kernel void @bar(ptr addrspace(4) %p, i1 %f1, i1 %f2, i1 %f3) {
+entry:
+  %val3 = load ptr addrspace(4), ptr addrspace(3) @G2, align 8
+  br label %l1
+
+l3:
+  br i1 %f3, label %l2, label %exit
+
+l1:
+  br i1 %f1, label %l2, label %exit
+
+l2:
+  %val1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @G1 to ptr addrspace(4)), %l1 ], [ %val2, %l3 ]
+  %val2 = phi ptr addrspace(4) [ %p, %l1 ], [ %val3, %l3 ]
+  br i1 %f2, label %l3, label %exit
+
+exit:
+  ret void
+}

>From c0f764c163ad0c75e85c32b65d9f1c8e948a4d86 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 20 Nov 2024 03:48:44 -0800
Subject: [PATCH 04/17] rework uncomplete types

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 207 +++++++++++-------
 1 file changed, 129 insertions(+), 78 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index c98c22641273ec..0625d4c1469ed2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -67,7 +67,7 @@ class SPIRVEmitIntrinsics
       public InstVisitor<SPIRVEmitIntrinsics, Instruction *> {
   SPIRVTargetMachine *TM = nullptr;
   SPIRVGlobalRegistry *GR = nullptr;
-  Function *F = nullptr;
+  Function *CurrF = nullptr;
   bool TrackConstants = true;
   bool HaveFunPtrs = false;
   DenseMap<Instruction *, Constant *> AggrConsts;
@@ -76,12 +76,27 @@ class SPIRVEmitIntrinsics
   SPIRV::InstructionSet::InstructionSet InstrSet;
 
   // a register of Instructions that don't have a complete type definition
-  DenseMap<Value *, unsigned> UncompleteTypeInfo;
-  SmallVector<Value *> PostprocessWorklist;
-  void addToUncompleteTypeInfo(Value *Op) {
-    auto It = UncompleteTypeInfo.try_emplace(Op, PostprocessWorklist.size());
-    if (It.second)
-      PostprocessWorklist.push_back(Op);
+  bool CanTodoType = true;
+  bool CanUpdateType = true;
+  unsigned TodoTypeSz = 0;
+  DenseMap<Value *, bool> TodoType;
+  void insertTodoType(Value *Op) {
+    if (CanTodoType) {
+      auto It = TodoType.try_emplace(Op, true);
+      if (It.second)
+        ++TodoTypeSz;
+    }
+  }
+  void eraseTodoType(Value *Op) {
+    auto It = TodoType.find(Op);
+    if (It != TodoType.end() && It->second) {
+      TodoType[Op] = false;
+      --TodoTypeSz;
+    }
+  }
+  bool isTodoType(Value *Op) {
+    auto It = TodoType.find(Op);
+    return It != TodoType.end() && It->second;
   }
 
   // well known result types of builtins
@@ -112,7 +127,7 @@ class SPIRVEmitIntrinsics
   // deduce Types of operands of the Instruction if possible
   void deduceOperandElementType(Instruction *I,
                                 const SmallPtrSet<Value *, 4> *AskOps = nullptr,
-                                SmallPtrSet<Value *, 16> *Completed = nullptr);
+                                bool IsPostprocessing = false);
 
   void preprocessCompositeConstants(IRBuilder<> &B);
   void preprocessUndefs(IRBuilder<> &B);
@@ -157,7 +172,7 @@ class SPIRVEmitIntrinsics
       SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy);
   void deduceOperandElementTypeFunctionPointer(
       CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
-      Type *&KnownElemTy, SmallPtrSet<Value *, 16> *Completed);
+      Type *&KnownElemTy, bool IsPostprocessing);
 
   void replaceWithPtrcasted(Instruction *CI, Type *NewElemTy, Type *KnownElemTy,
                             CallInst *AssignCI);
@@ -294,17 +309,10 @@ void SPIRVEmitIntrinsics::replaceAllUsesWith(Value *Src, Value *Dest,
   GR->updateIfExistDeducedElementType(Src, Dest, DeleteOld);
   GR->updateIfExistAssignPtrTypeInstr(Src, Dest, DeleteOld);
   // Update uncomplete type records if any
-  auto It = UncompleteTypeInfo.find(Src);
-  if (It == UncompleteTypeInfo.end())
-    return;
-  if (DeleteOld) {
-    unsigned Pos = It->second;
-    UncompleteTypeInfo.erase(Src);
-    auto It = UncompleteTypeInfo.try_emplace(Dest, Pos);
-    if (It.second)
-      PostprocessWorklist[Pos] = Dest;
-  } else {
-    addToUncompleteTypeInfo(Dest);
+  if (isTodoType(Src)) {
+    if (DeleteOld)
+      eraseTodoType(Src);
+    insertTodoType(Dest);
   }
 }
 
@@ -368,7 +376,7 @@ void SPIRVEmitIntrinsics::buildAssignPtr(IRBuilder<> &B, Type *ElemTy,
   Value *OfType = PoisonValue::get(ElemTy);
   CallInst *AssignPtrTyCI = GR->findAssignPtrTypeInstr(Arg);
   if (AssignPtrTyCI == nullptr ||
-      AssignPtrTyCI->getParent()->getParent() != F) {
+      AssignPtrTyCI->getParent()->getParent() != CurrF) {
     AssignPtrTyCI = buildIntrWithMD(
         Intrinsic::spv_assign_ptr_type, {Arg->getType()}, OfType, Arg,
         {B.getInt32(getPointerAddressSpace(Arg->getType()))}, B);
@@ -469,7 +477,7 @@ void SPIRVEmitIntrinsics::maybeAssignPtrType(Type *&Ty, Value *Op, Type *RefTy,
   if (isUntypedPointerTy(RefTy)) {
     if (!UnknownElemTypeI8)
       return;
-    addToUncompleteTypeInfo(Op);
+    insertTodoType(Op);
   }
   Ty = RefTy;
 }
@@ -672,7 +680,7 @@ Type *SPIRVEmitIntrinsics::deduceElementType(Value *I, bool UnknownElemTypeI8) {
     return Ty;
   if (!UnknownElemTypeI8)
     return nullptr;
-  addToUncompleteTypeInfo(I);
+  insertTodoType(I);
   return IntegerType::getInt8Ty(I->getContext());
 }
 
@@ -747,7 +755,7 @@ bool SPIRVEmitIntrinsics::deduceOperandElementTypeCalledFunction(
 // Try to deduce element type for a function pointer.
 void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
     CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
-    Type *&KnownElemTy, SmallPtrSet<Value *, 16> *Completed) {
+    Type *&KnownElemTy, bool IsPostprocessing) {
   Value *Op = CI->getCalledOperand();
   if (!Op || !isPointerTy(Op->getType()))
     return;
@@ -761,7 +769,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
       if (Type *ElemTy = GR->findDeducedElementType(Arg)) {
         IsNewFTy = true;
         ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy));
-        if (UncompleteTypeInfo.contains(Arg))
+        if (isTodoType(Arg))
           IsUncomplete = true;
       } else {
         IsUncomplete = true;
@@ -775,14 +783,14 @@ void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
       IsNewFTy = true;
       RetTy =
           TypedPointerType::get(ElemTy, getPointerAddressSpace(CI->getType()));
-      if (UncompleteTypeInfo.contains(CI))
+      if (isTodoType(CI))
         IsUncomplete = true;
     } else {
       IsUncomplete = true;
     }
   }
-  if (!Completed && IsUncomplete)
-    addToUncompleteTypeInfo(Op);
+  if (!IsPostprocessing && IsUncomplete)
+    insertTodoType(Op);
   KnownElemTy =
       IsNewFTy ? FunctionType::get(RetTy, ArgTys, FTy->isVarArg()) : FTy;
 }
@@ -793,14 +801,16 @@ void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
 // resolve the issue.
 void SPIRVEmitIntrinsics::deduceOperandElementType(
     Instruction *I, const SmallPtrSet<Value *, 4> *AskOps,
-    SmallPtrSet<Value *, 16> *Completed) {
+    bool IsPostprocessing) {
   SmallVector<std::pair<Value *, unsigned>> Ops;
   Type *KnownElemTy = nullptr;
+  bool Uncomplete = false;
   // look for known basic patterns of type inference
   if (auto *Ref = dyn_cast<PHINode>(I)) {
     if (!isPointerTy(I->getType()) ||
         !(KnownElemTy = GR->findDeducedElementType(I)))
       return;
+    Uncomplete = isTodoType(I);
     for (unsigned i = 0; i < Ref->getNumIncomingValues(); i++) {
       Value *Op = Ref->getIncomingValue(i);
       if (isPointerTy(Op->getType()))
@@ -810,6 +820,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     KnownElemTy = GR->findDeducedElementType(I);
     if (!KnownElemTy)
       return;
+    Uncomplete = isTodoType(I);
     Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
     KnownElemTy = Ref->getSourceElementType();
@@ -855,27 +866,29 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     if (!isPointerTy(I->getType()) ||
         !(KnownElemTy = GR->findDeducedElementType(I)))
       return;
+    Uncomplete = isTodoType(I);
     for (unsigned i = 0; i < Ref->getNumOperands(); i++) {
       Value *Op = Ref->getOperand(i);
       if (isPointerTy(Op->getType()))
         Ops.push_back(std::make_pair(Op, i));
     }
   } else if (auto *Ref = dyn_cast<ReturnInst>(I)) {
-    Type *RetTy = F->getReturnType();
+    Type *RetTy = CurrF->getReturnType();
     if (!isPointerTy(RetTy))
       return;
     Value *Op = Ref->getReturnValue();
     if (!Op)
       return;
-    if (!(KnownElemTy = GR->findDeducedElementType(F))) {
+    if (!(KnownElemTy = GR->findDeducedElementType(CurrF))) {
       if (Type *OpElemTy = GR->findDeducedElementType(Op)) {
-        GR->addDeducedElementType(F, OpElemTy);
+        GR->addDeducedElementType(CurrF, OpElemTy);
         TypedPointerType *DerivedTy =
             TypedPointerType::get(OpElemTy, getPointerAddressSpace(RetTy));
-        GR->addReturnType(F, DerivedTy);
+        GR->addReturnType(CurrF, DerivedTy);
       }
       return;
     }
+    Uncomplete = isTodoType(CurrF);
     Ops.push_back(std::make_pair(Op, 0));
   } else if (auto *Ref = dyn_cast<ICmpInst>(I)) {
     if (!isPointerTy(Ref->getOperand(0)->getType()))
@@ -886,34 +899,36 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     Type *ElemTy1 = GR->findDeducedElementType(Op1);
     if (ElemTy0) {
       KnownElemTy = ElemTy0;
+      Uncomplete = isTodoType(Op0);
       Ops.push_back(std::make_pair(Op1, 1));
     } else if (ElemTy1) {
       KnownElemTy = ElemTy1;
+      Uncomplete = isTodoType(Op1);
       Ops.push_back(std::make_pair(Op0, 0));
     }
   } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
     if (!CI->isIndirectCall())
       deduceOperandElementTypeCalledFunction(InstrSet, CI, Ops, KnownElemTy);
     else if (HaveFunPtrs)
-      deduceOperandElementTypeFunctionPointer(CI, Ops, KnownElemTy, Completed);
+      deduceOperandElementTypeFunctionPointer(CI, Ops, KnownElemTy,
+                                              IsPostprocessing);
   }
 
   // There is no enough info to deduce types or all is valid.
   if (!KnownElemTy || Ops.size() == 0)
     return;
 
-  LLVMContext &Ctx = F->getContext();
+  LLVMContext &Ctx = CurrF->getContext();
   IRBuilder<> B(Ctx);
   for (auto &OpIt : Ops) {
     Value *Op = OpIt.first;
     if (Op->use_empty())
       continue;
+    if (AskOps && !AskOps->contains(Op))
+      continue;
     Type *AskTy = nullptr;
     CallInst *AskCI = nullptr;
-    if (AskOps) {
-      auto It = AskOps->find(Op);
-      if (It == AskOps->end())
-        continue;
+    if (IsPostprocessing && AskOps) {
       AskTy = GR->findDeducedElementType(Op);
       AskCI = GR->findAssignPtrTypeInstr(Op);
       assert(AskTy && AskCI);
@@ -923,12 +938,14 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
       continue;
     Value *OpTyVal = PoisonValue::get(KnownElemTy);
     Type *OpTy = Op->getType();
-    if (!Ty || AskTy || isUntypedPointerTy(Ty) ||
-        UncompleteTypeInfo.contains(Op)) {
+    if (!Ty || (CanUpdateType &&
+                (AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)))) {
       GR->addDeducedElementType(Op, KnownElemTy);
       // check if KnownElemTy is complete
-      if (!Completed && UncompleteTypeInfo.contains(I))
-        addToUncompleteTypeInfo(Op);
+      if (!Uncomplete)
+        eraseTodoType(Op);
+      else if (!IsPostprocessing)
+        insertTodoType(Op);
       // check if there is existing Intrinsic::spv_assign_ptr_type instruction
       CallInst *AssignCI = AskCI ? AskCI : GR->findAssignPtrTypeInstr(Op);
       if (AssignCI == nullptr) {
@@ -940,10 +957,9 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
         GR->addAssignPtrTypeInstr(Op, CI);
       } else {
         updateAssignType(AssignCI, Op, OpTyVal);
-        if (Completed)
-          Completed->insert(Op);
       }
     } else {
+      eraseTodoType(Op);
       if (auto *OpI = dyn_cast<Instruction>(Op)) {
         // spv_ptrcast's argument Op denotes an instruction that generates
         // a value, and we may use getInsertionPointAfterDef()
@@ -953,7 +969,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
         B.SetInsertPointPastAllocas(OpA->getParent());
         B.SetCurrentDebugLocation(DebugLoc());
       } else {
-        B.SetInsertPoint(F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
+        B.SetInsertPoint(CurrF->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
       }
       SmallVector<Type *, 2> Types = {OpTy, OpTy};
       SmallVector<Value *, 2> Args = {Op, buildMD(OpTyVal),
@@ -993,7 +1009,7 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
 
 void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) {
   std::queue<Instruction *> Worklist;
-  for (auto &I : instructions(F))
+  for (auto &I : instructions(CurrF))
     Worklist.push(&I);
 
   while (!Worklist.empty()) {
@@ -1021,7 +1037,7 @@ void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) {
 
 void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) {
   std::queue<Instruction *> Worklist;
-  for (auto &I : instructions(F))
+  for (auto &I : instructions(CurrF))
     Worklist.push(&I);
 
   while (!Worklist.empty()) {
@@ -1080,7 +1096,7 @@ Instruction *SPIRVEmitIntrinsics::visitCallInst(CallInst &Call) {
     return &Call;
 
   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
-  LLVMContext &Ctx = F->getContext();
+  LLVMContext &Ctx = CurrF->getContext();
 
   Constant *TyC = UndefValue::get(IA->getFunctionType());
   MDString *ConstraintString = MDString::get(Ctx, IA->getConstraintString());
@@ -1281,10 +1297,10 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
                                                          IRBuilder<> &B) {
   // Handle basic instructions:
   StoreInst *SI = dyn_cast<StoreInst>(I);
-  if (IsKernelArgInt8(F, SI)) {
+  if (IsKernelArgInt8(CurrF, SI)) {
     return replacePointerOperandWithPtrCast(
-        I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0,
-        B);
+        I, SI->getValueOperand(), IntegerType::getInt8Ty(CurrF->getContext()),
+        0, B);
   } else if (SI) {
     Value *Op = SI->getValueOperand();
     Type *OpTy = Op->getType();
@@ -1451,7 +1467,7 @@ Instruction *SPIRVEmitIntrinsics::visitLoadInst(LoadInst &I) {
   TrackConstants = false;
   const auto *TLI = TM->getSubtargetImpl()->getTargetLowering();
   MachineMemOperand::Flags Flags =
-      TLI->getLoadMemOperandFlags(I, F->getDataLayout());
+      TLI->getLoadMemOperandFlags(I, CurrF->getDataLayout());
   auto *NewI =
       B.CreateIntrinsic(Intrinsic::spv_load, {I.getOperand(0)->getType()},
                         {I.getPointerOperand(), B.getInt16(Flags),
@@ -1468,7 +1484,7 @@ Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) {
   TrackConstants = false;
   const auto *TLI = TM->getSubtargetImpl()->getTargetLowering();
   MachineMemOperand::Flags Flags =
-      TLI->getStoreMemOperandFlags(I, F->getDataLayout());
+      TLI->getStoreMemOperandFlags(I, CurrF->getDataLayout());
   auto *PtrOp = I.getPointerOperand();
   auto *NewI = B.CreateIntrinsic(
       Intrinsic::spv_store, {I.getValueOperand()->getType(), PtrOp->getType()},
@@ -1774,9 +1790,28 @@ void SPIRVEmitIntrinsics::processParamTypesByFunHeader(Function *F,
     if (!isUntypedPointerTy(Arg->getType()))
       continue;
     Type *ElemTy = GR->findDeducedElementType(Arg);
-    if (!ElemTy && hasPointeeTypeAttr(Arg) &&
-        (ElemTy = getPointeeTypeByAttr(Arg)) != nullptr)
+    if (ElemTy)
+      continue;
+    if (hasPointeeTypeAttr(Arg) &&
+        (ElemTy = getPointeeTypeByAttr(Arg)) != nullptr) {
       buildAssignPtr(B, ElemTy, Arg);
+      continue;
+    }
+    if (HaveFunPtrs) {
+      for (User *U : Arg->users()) {
+        CallInst *CI = dyn_cast<CallInst>(U);
+        if (CI && !isa<IntrinsicInst>(CI) && CI->isIndirectCall() &&
+            CI->getCalledOperand() == Arg &&
+            CI->getParent()->getParent() == CurrF) {
+          SmallVector<std::pair<Value *, unsigned>> Ops;
+          deduceOperandElementTypeFunctionPointer(CI, Ops, ElemTy, false);
+          if (ElemTy) {
+            buildAssignPtr(B, ElemTy, Arg);
+            break;
+          }
+        }
+      }
+    }
   }
 }
 
@@ -1877,17 +1912,17 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   InstrSet = ST.isOpenCLEnv() ? SPIRV::InstructionSet::OpenCL_std
                               : SPIRV::InstructionSet::GLSL_std_450;
 
-  if (!F)
+  if (!CurrF)
     HaveFunPtrs =
         ST.canUseExtension(SPIRV::Extension::SPV_INTEL_function_pointers);
 
-  F = &Func;
+  CurrF = &Func;
   IRBuilder<> B(Func.getContext());
   AggrConsts.clear();
   AggrConstTypes.clear();
   AggrStores.clear();
 
-  processParamTypesByFunHeader(F, B);
+  processParamTypesByFunHeader(CurrF, B);
 
   // StoreInst's operand type can be changed during the next transformations,
   // so we need to store it in the set. Also store already transformed types.
@@ -1936,7 +1971,8 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   // in meaning of `instructions(Func)`.
   for (BasicBlock &BB : Func)
     for (PHINode &Phi : BB.phis())
-      deduceOperandElementType(&Phi);
+      if (isPointerTy(Phi.getType()))
+        deduceOperandElementType(&Phi);
 
   for (auto *I : Worklist) {
     TrackConstants = true;
@@ -1980,63 +2016,65 @@ void SPIRVEmitIntrinsics::replaceWithPtrcasted(Instruction *CI, Type *NewElemTy,
 
 // Try to deduce a better type for pointers to untyped ptr.
 bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
-  if (!GR || UncompleteTypeInfo.size() == 0)
+  if (!GR || TodoTypeSz == 0)
     return false;
 
+  unsigned SzTodo = TodoTypeSz;
   DenseMap<Value *, SmallPtrSet<Value *, 4>> ToProcess;
-  SmallPtrSet<Value *, 16> Completed;
-  for (auto IB = PostprocessWorklist.rbegin(), IE = PostprocessWorklist.rend();
-       IB != IE; ++IB) {
-    CallInst *AssignCI = GR->findAssignPtrTypeInstr(*IB);
-    Type *KnownTy = GR->findDeducedElementType(*IB);
+  for (auto [Op, Enabled] : TodoType) {
+    if (!Enabled)
+      continue;
+    CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op);
+    Type *KnownTy = GR->findDeducedElementType(Op);
     if (!KnownTy || !AssignCI)
       continue;
-    assert(AssignCI->getArgOperand(0) == *IB);
+    assert(Op == AssignCI->getArgOperand(0));
     // Try to improve the type deduced after all Functions are processed.
-    if (auto *CI = dyn_cast<CallInst>(*IB)) {
+    if (auto *CI = dyn_cast<CallInst>(Op)) {
+      // TODO: deduceElementTypeHelper() & replaceWithPtrcasted() if
+      // isa<Instruction>(Op)
+      CurrF = CI->getParent()->getParent();
       if (Function *CalledF = CI->getCalledFunction()) {
         Type *RetElemTy = GR->findDeducedElementType(CalledF);
         // Fix inconsistency between known type and function's return type.
         if (RetElemTy && RetElemTy != KnownTy) {
           replaceWithPtrcasted(CI, RetElemTy, KnownTy, AssignCI);
-          Completed.insert(CI);
+          eraseTodoType(Op);
           continue;
         }
       }
     }
-    Value *Op = AssignCI->getArgOperand(0);
     for (User *U : Op->users()) {
       Instruction *Inst = dyn_cast<Instruction>(U);
       if (Inst && !isa<IntrinsicInst>(Inst))
         ToProcess[Inst].insert(Op);
     }
   }
-  if (Completed.size() >= UncompleteTypeInfo.size())
+  if (TodoTypeSz == 0)
     return true;
 
   for (auto &F : M) {
+    CurrF = &F;
     for (auto &I : llvm::reverse(instructions(F))) {
       auto It = ToProcess.find(&I);
       if (It == ToProcess.end())
         continue;
-      It->second.remove_if(
-          [&Completed](Value *V) { return Completed.contains(V); });
+      It->second.remove_if([this](Value *V) { return !isTodoType(V); });
       if (It->second.size() == 0)
         continue;
-      deduceOperandElementType(&I, &It->second, &Completed);
-      if (Completed.size() >= UncompleteTypeInfo.size())
+      deduceOperandElementType(&I, &It->second, true);
+      if (TodoTypeSz == 0)
         return true;
     }
   }
 
-  return Completed.size() > 0;
+  return SzTodo > TodoTypeSz;
 }
 
 bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
   bool Changed = false;
 
-  UncompleteTypeInfo.clear();
-  PostprocessWorklist.clear();
+  TodoType.clear();
   for (auto &F : M)
     Changed |= runOnFunction(F);
 
@@ -2049,7 +2087,20 @@ bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
     }
   }
 
+  CanTodoType = false;
   Changed |= postprocessTypes(M);
+
+  // Validation pass.
+  CanUpdateType = false;
+  TodoType.clear();
+  for (auto &F : M) {
+    CurrF = &F;
+    for (BasicBlock &BB : F)
+      for (PHINode &Phi : BB.phis())
+        if (isPointerTy(Phi.getType()))
+          deduceOperandElementType(&Phi, nullptr, true);
+  }
+
   if (HaveFunPtrs)
     Changed |= processFunctionPointers(M);
 

>From b4c11c500fb036bdc550b430dfe21f7299addc71 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 20 Nov 2024 04:57:31 -0800
Subject: [PATCH 05/17] fix function pointers and dealing with uncomplete types

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp     | 15 +--------------
 .../SPV_INTEL_function_pointers/fp_two_calls.ll   | 14 +++++++-------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 0625d4c1469ed2..7460e0a71aae51 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -77,7 +77,6 @@ class SPIRVEmitIntrinsics
 
   // a register of Instructions that don't have a complete type definition
   bool CanTodoType = true;
-  bool CanUpdateType = true;
   unsigned TodoTypeSz = 0;
   DenseMap<Value *, bool> TodoType;
   void insertTodoType(Value *Op) {
@@ -938,8 +937,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
       continue;
     Value *OpTyVal = PoisonValue::get(KnownElemTy);
     Type *OpTy = Op->getType();
-    if (!Ty || (CanUpdateType &&
-                (AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)))) {
+    if (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)) {
       GR->addDeducedElementType(Op, KnownElemTy);
       // check if KnownElemTy is complete
       if (!Uncomplete)
@@ -2090,17 +2088,6 @@ bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
   CanTodoType = false;
   Changed |= postprocessTypes(M);
 
-  // Validation pass.
-  CanUpdateType = false;
-  TodoType.clear();
-  for (auto &F : M) {
-    CurrF = &F;
-    for (BasicBlock &BB : F)
-      for (PHINode &Phi : BB.phis())
-        if (isPointerTy(Phi.getType()))
-          deduceOperandElementType(&Phi, nullptr, true);
-  }
-
   if (HaveFunPtrs)
     Changed |= processFunctionPointers(M);
 
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
index 621d06aa4aadee..1b217c3bb92f16 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
@@ -12,17 +12,17 @@
 ; CHECK-DAG: OpName %[[test:.*]] "test"
 ; CHECK-DAG: %[[TyVoid:.*]] = OpTypeVoid
 ; CHECK-DAG: %[[TyFloat32:.*]] = OpTypeFloat 32
-; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0
 ; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0
+; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0
 ; CHECK-DAG: %[[TyPtrInt8:.*]] = OpTypePointer Function %[[TyInt8]]
-; CHECK-DAG: %[[TyUncompleteBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrInt8]] %[[TyPtrInt8]]
-; CHECK-DAG: %[[TyPtrUncompleteBar:.*]] = OpTypePointer Function %[[TyUncompleteBar]]
-; CHECK-DAG: %[[TyUncompleteFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrUncompleteBar]]
+; CHECK-DAG: %[[TyUncompleteFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrInt8]]
 ; CHECK-DAG: %[[TyPtrUncompleteFp:.*]] = OpTypePointer Function %[[TyUncompleteFp]]
-; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrUncompleteFp]] %[[TyPtrInt8]]
-; CHECK-DAG: %[[TyPtrBar:.*]] = OpTypePointer Function %[[TyBar]]
-; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrBar]]
+; CHECK-DAG: %[[TyUncompleteBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrUncompleteFp]] %[[TyPtrInt8]]
+; CHECK-DAG: %[[TyPtrUncompleteBar:.*]] = OpTypePointer Function %[[TyUncompleteBar]]
+; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrUncompleteBar]]
 ; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]]
+; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrFp]] %[[TyPtrInt8]]
+; CHECK-DAG: %[[TyPtrBar:.*]] = OpTypePointer Function %[[TyBar]]
 ; CHECK-DAG: %[[TyTest:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrFp]] %[[TyPtrInt8]] %[[TyPtrBar]]
 ; CHECK: %[[test]] = OpFunction %[[TyVoid]] None %[[TyTest]]
 ; CHECK: %[[fp]] = OpFunctionParameter %[[TyPtrFp]]

>From c64853195a9b2940b1a97db22453821a5c48ea1a Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 20 Nov 2024 05:44:26 -0800
Subject: [PATCH 06/17] widen search for uncomplete types

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 26 +++++++++----------
 .../SPIRV/pointers/phi-valid-operand-types.ll |  8 +++---
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 7460e0a71aae51..5e1e47455a9b31 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -105,7 +105,8 @@ class SPIRVEmitIntrinsics
   Type *deduceElementType(Value *I, bool UnknownElemTypeI8);
   Type *deduceElementTypeHelper(Value *I, bool UnknownElemTypeI8);
   Type *deduceElementTypeHelper(Value *I, std::unordered_set<Value *> &Visited,
-                                bool UnknownElemTypeI8);
+                                bool UnknownElemTypeI8,
+                                bool IgnoreKnownType = false);
   Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
                                      bool UnknownElemTypeI8);
   Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
@@ -482,14 +483,16 @@ void SPIRVEmitIntrinsics::maybeAssignPtrType(Type *&Ty, Value *Op, Type *RefTy,
 }
 
 Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
-    Value *I, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8) {
+    Value *I, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8,
+    bool IgnoreKnownType) {
   // allow to pass nullptr as an argument
   if (!I)
     return nullptr;
 
   // maybe already known
-  if (Type *KnownTy = GR->findDeducedElementType(I))
-    return KnownTy;
+  if (!IgnoreKnownType)
+    if (Type *KnownTy = GR->findDeducedElementType(I))
+      return KnownTy;
 
   // maybe a cycle
   if (!Visited.insert(I).second)
@@ -577,7 +580,7 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
   }
 
   // remember the found relationship
-  if (Ty) {
+  if (Ty && !IgnoreKnownType) {
     // specify nested types if needed, otherwise return unchanged
     GR->addDeducedElementType(I, Ty);
   }
@@ -2028,15 +2031,12 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
       continue;
     assert(Op == AssignCI->getArgOperand(0));
     // Try to improve the type deduced after all Functions are processed.
-    if (auto *CI = dyn_cast<CallInst>(Op)) {
-      // TODO: deduceElementTypeHelper() & replaceWithPtrcasted() if
-      // isa<Instruction>(Op)
+    if (auto *CI = dyn_cast<Instruction>(Op)) {
       CurrF = CI->getParent()->getParent();
-      if (Function *CalledF = CI->getCalledFunction()) {
-        Type *RetElemTy = GR->findDeducedElementType(CalledF);
-        // Fix inconsistency between known type and function's return type.
-        if (RetElemTy && RetElemTy != KnownTy) {
-          replaceWithPtrcasted(CI, RetElemTy, KnownTy, AssignCI);
+      std::unordered_set<Value *> Visited;
+      if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) {
+        if (ElemTy != KnownTy) {
+          replaceWithPtrcasted(CI, ElemTy, KnownTy, AssignCI);
           eraseTodoType(Op);
           continue;
         }
diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
index 07824d4ed6cd85..c2db50e7aa394d 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
-; CHECK: %[[#Char:]] = OpTypeInt 8 0
-; CHECK: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]]
-; CHECK: %[[#Int:]] = OpTypeInt 32 0
-; CHECK: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]]
+; CHECK-DAG: %[[#Char:]] = OpTypeInt 8 0
+; CHECK-DAG: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]]
+; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]]
 ; CHECK: %[[#R1:]] = OpFunctionCall %[[#PtrChar]] %[[#]]
 ; CHECK: %[[#R2:]] = OpFunctionCall %[[#PtrInt]] %[[#]]
 ; CHECK: %[[#Casted:]] = OpBitcast %[[#PtrChar]] %[[#R2]]

>From f9a9915f670bdbc4e65d4b6ce64df1457730569a Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Mon, 25 Nov 2024 08:22:10 -0800
Subject: [PATCH 07/17] improve type inference

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 201 +++++++++++++++---
 llvm/lib/Target/SPIRV/SPIRVUtils.h            |  12 +-
 .../type-deduce-via-store-load-args-rev.ll    |  64 ++++++
 .../SPIRV/transcoding/spirv-event-null.ll     |  13 ++
 4 files changed, 251 insertions(+), 39 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 5e1e47455a9b31..e9b3345fc3309b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -143,6 +143,9 @@ class SPIRVEmitIntrinsics
     return B.CreateIntrinsic(IntrID, {Types}, Args);
   }
 
+  Type *reconstructType(Value *Op, bool UnknownElemTypeI8,
+                        bool IsPostprocessing);
+
   void buildAssignType(IRBuilder<> &B, Type *ElemTy, Value *Arg);
   void buildAssignPtr(IRBuilder<> &B, Type *ElemTy, Value *Arg);
   void updateAssignType(CallInst *AssignCI, Value *Arg, Value *OfType);
@@ -168,8 +171,8 @@ class SPIRVEmitIntrinsics
                                   std::unordered_set<Function *> &FVisited);
 
   bool deduceOperandElementTypeCalledFunction(
-      SPIRV::InstructionSet::InstructionSet InstrSet, CallInst *CI,
-      SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy);
+      CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+      Type *&KnownElemTy);
   void deduceOperandElementTypeFunctionPointer(
       CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
       Type *&KnownElemTy, bool IsPostprocessing);
@@ -336,8 +339,11 @@ static inline Type *restoreMutatedType(SPIRVGlobalRegistry *GR, Instruction *I,
 
 // Reconstruct type with nested element types according to deduced type info.
 // Return nullptr if no detailed type info is available.
-static inline Type *reconstructType(SPIRVGlobalRegistry *GR, Value *Op) {
+Type *SPIRVEmitIntrinsics::reconstructType(Value *Op, bool UnknownElemTypeI8,
+                                           bool IsPostprocessing) {
   Type *Ty = Op->getType();
+  if (auto *OpI = dyn_cast<Instruction>(Op))
+    Ty = restoreMutatedType(GR, OpI, Ty);
   if (!isUntypedPointerTy(Ty))
     return Ty;
   // try to find the pointee type
@@ -345,10 +351,17 @@ static inline Type *reconstructType(SPIRVGlobalRegistry *GR, Value *Op) {
     return getTypedPointerWrapper(NestedTy, getPointerAddressSpace(Ty));
   // not a pointer according to the type info (e.g., Event object)
   CallInst *CI = GR->findAssignPtrTypeInstr(Op);
-  if (!CI)
-    return nullptr;
-  MetadataAsValue *MD = cast<MetadataAsValue>(CI->getArgOperand(1));
-  return cast<ConstantAsMetadata>(MD->getMetadata())->getType();
+  if (CI) {
+    MetadataAsValue *MD = cast<MetadataAsValue>(CI->getArgOperand(1));
+    return cast<ConstantAsMetadata>(MD->getMetadata())->getType();
+  }
+  if (UnknownElemTypeI8) {
+    if (!IsPostprocessing)
+      insertTodoType(Op);
+    return getTypedPointerWrapper(IntegerType::getInt8Ty(Op->getContext()),
+                                  getPointerAddressSpace(Ty));
+  }
+  return nullptr;
 }
 
 void SPIRVEmitIntrinsics::buildAssignType(IRBuilder<> &B, Type *Ty,
@@ -403,6 +416,7 @@ void SPIRVEmitIntrinsics::updateAssignType(CallInst *AssignCI, Value *Arg,
 
 // Set element pointer type to the given value of ValueTy and tries to
 // specify this type further (recursively) by Operand value, if needed.
+
 Type *
 SPIRVEmitIntrinsics::deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
                                                   bool UnknownElemTypeI8) {
@@ -504,7 +518,15 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
   if (auto *Ref = dyn_cast<AllocaInst>(I)) {
     maybeAssignPtrType(Ty, I, Ref->getAllocatedType(), UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
+    // TODO: Iterate the indices to find the return type if it's a pointer
     Ty = Ref->getResultElementType();
+  } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
+    Value *Op = Ref->getPointerOperand();
+    Type *KnownTy = GR->findDeducedElementType(Op);
+    if (!KnownTy)
+      KnownTy = Op->getType();
+    if (Type *ElemTy = getPointeeType(KnownTy))
+      maybeAssignPtrType(Ty, I, ElemTy, UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<GlobalValue>(I)) {
     Ty = deduceElementTypeByValueDeep(
         Ref->getValueType(),
@@ -702,8 +724,8 @@ static inline Type *getAtomicElemTy(SPIRVGlobalRegistry *GR, Instruction *I,
 // Try to deduce element type for a call base. Returns false if this is an
 // indirect function invocation, and true otherwise.
 bool SPIRVEmitIntrinsics::deduceOperandElementTypeCalledFunction(
-    SPIRV::InstructionSet::InstructionSet InstrSet, CallInst *CI,
-    SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy) {
+    CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+    Type *&KnownElemTy) {
   Function *CalledF = CI->getCalledFunction();
   if (!CalledF)
     return false;
@@ -825,6 +847,12 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     Uncomplete = isTodoType(I);
     Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
+    // TODO: GR->findDeduceElementType()
+    //  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    //    return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(),
+    //                                            GEPI->getSourceElementType(),
+    //                                            0, B);
+    //  }
     KnownElemTy = Ref->getSourceElementType();
     if (isUntypedPointerTy(KnownElemTy))
       return;
@@ -845,7 +873,9 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
   } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
     if (IsKernelArgInt8(Ref->getParent()->getParent(), Ref))
       return;
-    if (!(KnownElemTy = reconstructType(GR, Ref->getValueOperand())))
+    if (!(KnownElemTy =
+              reconstructType(Ref->getValueOperand(),
+                              false /*UnknownElemTypeI8*/, IsPostprocessing)))
       return;
     Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
     if (PointeeTy && !isUntypedPointerTy(PointeeTy))
@@ -910,7 +940,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     }
   } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
     if (!CI->isIndirectCall())
-      deduceOperandElementTypeCalledFunction(InstrSet, CI, Ops, KnownElemTy);
+      deduceOperandElementTypeCalledFunction(CI, Ops, KnownElemTy);
     else if (HaveFunPtrs)
       deduceOperandElementTypeFunctionPointer(CI, Ops, KnownElemTy,
                                               IsPostprocessing);
@@ -964,8 +994,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
       if (auto *OpI = dyn_cast<Instruction>(Op)) {
         // spv_ptrcast's argument Op denotes an instruction that generates
         // a value, and we may use getInsertionPointAfterDef()
-        B.SetInsertPoint(*OpI->getInsertionPointAfterDef());
-        B.SetCurrentDebugLocation(OpI->getDebugLoc());
+        setInsertPointAfterDef(B, OpI);
       } else if (auto *OpA = dyn_cast<Argument>(Op)) {
         B.SetInsertPointPastAllocas(OpA->getParent());
         B.SetCurrentDebugLocation(DebugLoc());
@@ -1190,9 +1219,12 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
   Type *VTy = V->getType();
 
   // A couple of sanity checks.
-  assert(isPointerTy(VTy) && "Expect a pointer type!");
-  if (auto PType = dyn_cast<TypedPointerType>(VTy))
-    if (PType->getElementType() != AssignedType)
+  assert((isPointerTy(VTy) ||
+          (isa<TargetExtType>(VTy) &&
+           isTypedPointerWrapper(dyn_cast<TargetExtType>(VTy)))) &&
+         "Expect a pointer type!");
+  if (Type *ElemTy = getPointeeType(VTy))
+    if (ElemTy != AssignedType)
       report_fatal_error("Unexpected pointer element type!");
 
   CallInst *AssignCI = GR->findAssignPtrTypeInstr(V);
@@ -1235,7 +1267,8 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
     return;
 
   setInsertPointSkippingPhis(B, I);
-  MetadataAsValue *VMD = buildMD(PoisonValue::get(ExpectedElementType));
+  Value *ExpectedElementVal = PoisonValue::get(ExpectedElementType);
+  MetadataAsValue *VMD = buildMD(ExpectedElementVal);
   unsigned AddressSpace = getPointerAddressSpace(Pointer->getType());
   bool FirstPtrCastOrAssignPtrType = true;
 
@@ -1272,17 +1305,21 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
     return;
   }
 
-  // // Do not emit spv_ptrcast if it would cast to the default pointer element
-  // // type (i8) of the same address space.
-  // if (ExpectedElementType->isIntegerTy(8))
-  //   return;
-
-  // If this would be the first spv_ptrcast, do not emit spv_ptrcast and emit
-  // spv_assign_ptr_type instead.
-  if (FirstPtrCastOrAssignPtrType &&
-      (isa<Instruction>(Pointer) || isa<Argument>(Pointer))) {
-    buildAssignPtr(B, ExpectedElementType, Pointer);
-    return;
+  if (isa<Instruction>(Pointer) || isa<Argument>(Pointer)) {
+    if (FirstPtrCastOrAssignPtrType) {
+      // If this would be the first spv_ptrcast, do not emit spv_ptrcast and
+      // emit spv_assign_ptr_type instead.
+      buildAssignPtr(B, ExpectedElementType, Pointer);
+      return;
+    } else if (isTodoType(Pointer)) {
+      // If this wouldn't be the first spv_ptrcast but existing type info is
+      // uncomplete, update spv_assign_ptr_type arguments.
+      if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer))
+        updateAssignType(AssignCI, Pointer, ExpectedElementVal);
+      else
+        buildAssignPtr(B, ExpectedElementType, Pointer);
+      return;
+    }
   }
 
   // Emit spv_ptrcast
@@ -1319,6 +1356,7 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
                                             GEPI->getSourceElementType(), 0, B);
   }
 
+  // TODO: review and maybe merge with existing logics the following ...:
   // Handle calls to builtins (non-intrinsics):
   CallInst *CI = dyn_cast<CallInst>(I);
   if (!CI || CI->isIndirectCall() || CI->isInlineAsm() ||
@@ -1336,8 +1374,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
     Type *ArgType = CalledArg->getType();
     if (!isPointerTy(ArgType)) {
       CalledArgTys.push_back(nullptr);
-    } else if (isTypedPointerTy(ArgType)) {
-      CalledArgTys.push_back(cast<TypedPointerType>(ArgType)->getElementType());
+    } else if (Type *ArgTypeElem = getPointeeType(ArgType)) {
+      CalledArgTys.push_back(ArgTypeElem);
       HaveTypes = true;
     } else {
       Type *ElemTy = GR->findDeducedElementType(CalledArg);
@@ -1620,7 +1658,53 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
         }
       }
     }
-  }
+  } /*else if (auto *Ref = dyn_cast<StoreInst>(I)) {
+    if (!IsKernelArgInt8(CurrF, Ref)) {
+      Type *ElemTy = reconstructType(Ref->getValueOperand(), false, false);
+      if (ElemTy) {
+        setInsertPointAfterDef(B, I);
+        buildAssignPtr(B, ElemTy, Ref->getPointerOperand());
+      }
+    }
+  } */ /*else if (auto *Ref = dyn_cast<StoreInst>(I)) {
+    if (!IsKernelArgInt8(CurrF, Ref)) {
+      Type *ElemTy =
+          reconstructType(Ref->getValueOperand(), true,
+                          false);
+      assert(ElemTy);
+      setInsertPointAfterDef(B, I);
+      buildAssignPtr(B, ElemTy, Ref->getPointerOperand());
+    }
+  } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
+    if (IsKernelArgInt8(CurrF, Ref)) {
+      // TODO: rework this outdated call
+      replacePointerOperandWithPtrCast(
+          I, Ref->getPointerOperand(),
+          IntegerType::getInt8Ty(CurrF->getContext()), 0, B);
+    } else {
+      Type *ElemTy = reconstructType(GR, Ref->getValueOperand(), true);
+      assert(ElemTy);
+      setInsertPointAfterDef(B, I);
+      buildAssignPtr(B, ElemTy, Ref->getPointerOperand());
+    }
+  } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
+    Value *Op = Ref->getPointerOperand();
+    Type *ElemTy = Ref->getSourceElementType();
+    if (isUntypedPointerTy(ElemTy))
+      insertTodoType(Op);
+    setInsertPointAfterDef(B, I);
+    buildAssignPtr(B, ElemTy, Op);
+    // TODO: rework this outdated call
+    // replacePointerOperandWithPtrCast(I, Op,
+    //                                 ElemTy, 0, B);
+    // Ty = Ref->getResultElementType();
+    //  TODO: GR->findDeduceElementType()
+    //   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    //     return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(),
+    //                                             GEPI->getSourceElementType(),
+    //                                             0, B);
+    //   }
+  }*/
 
   Type *Ty = I->getType();
   if (!IsKnown && !Ty->isVoidTy() && !isPointerTy(Ty) && requireAssignType(I)) {
@@ -1651,8 +1735,9 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
         GR->addAssignPtrTypeInstr(Op, AssignCI);
       } else if (!isa<Instruction>(Op)) {
         Type *OpTy = Op->getType();
-        if (auto PType = dyn_cast<TypedPointerType>(OpTy)) {
-          buildAssignPtr(B, PType->getElementType(), Op);
+        Type *OpTyElem = getPointeeType(OpTy);
+        if (OpTyElem) {
+          buildAssignPtr(B, OpTyElem, Op);
         } else if (isPointerTy(OpTy)) {
           Type *ElemTy = GR->findDeducedElementType(Op);
           buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op, true), Op);
@@ -1798,6 +1883,27 @@ void SPIRVEmitIntrinsics::processParamTypesByFunHeader(Function *F,
       buildAssignPtr(B, ElemTy, Arg);
       continue;
     }
+    // search in function's call sites
+    for (User *U : F->users()) {
+      CallInst *CI = dyn_cast<CallInst>(U);
+      if (!CI || OpIdx >= CI->arg_size())
+        continue;
+      Value *OpArg = CI->getArgOperand(OpIdx);
+      if (!isPointerTy(OpArg->getType()))
+        continue;
+      // maybe we already know operand's element type
+      if ((ElemTy = GR->findDeducedElementType(OpArg)) != nullptr)
+        break;
+    }
+    if (ElemTy) {
+      buildAssignPtr(B, ElemTy, Arg);
+      continue;
+    }
+    // ElemTy = deduceFunParamElementType(F, OpIdx);
+    // if (ElemTy) {
+    //   buildAssignPtr(B, ElemTy, Arg);
+    //   continue;
+    // }
     if (HaveFunPtrs) {
       for (User *U : Arg->users()) {
         CallInst *CI = dyn_cast<CallInst>(U);
@@ -1813,6 +1919,33 @@ void SPIRVEmitIntrinsics::processParamTypesByFunHeader(Function *F,
         }
       }
     }
+    /*
+        for (User *U : Arg->users()) {
+          if (CallInst *CI = dyn_cast<CallInst>(U)) {
+            if (!CI->isIndirectCall())
+              deduceOperandElementTypeCalledFunction(CI, Ops, ElemTy);
+            else if (HaveFunPtrs)
+              deduceOperandElementTypeFunctionPointer(CI, Ops, ElemTy, false);
+          }
+        }
+    */
+    /*
+        if (HaveFunPtrs) {
+          for (User *U : Arg->users()) {
+            CallInst *CI = dyn_cast<CallInst>(U);
+            if (CI && !isa<IntrinsicInst>(CI) && CI->isIndirectCall() &&
+                CI->getCalledOperand() == Arg &&
+                CI->getParent()->getParent() == CurrF) {
+              SmallVector<std::pair<Value *, unsigned>> Ops;
+              deduceOperandElementTypeFunctionPointer(CI, Ops, ElemTy, false);
+              if (ElemTy) {
+                buildAssignPtr(B, ElemTy, Arg);
+                break;
+              }
+            }
+          }
+        }
+    */
   }
 }
 
@@ -1998,7 +2131,6 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
 void SPIRVEmitIntrinsics::replaceWithPtrcasted(Instruction *CI, Type *NewElemTy,
                                                Type *KnownElemTy,
                                                CallInst *AssignCI) {
-  updateAssignType(AssignCI, CI, PoisonValue::get(NewElemTy));
   IRBuilder<> B(CI->getContext());
   B.SetInsertPoint(*CI->getInsertionPointAfterDef());
   B.SetCurrentDebugLocation(CI->getDebugLoc());
@@ -2036,6 +2168,7 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
       std::unordered_set<Value *> Visited;
       if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) {
         if (ElemTy != KnownTy) {
+          updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
           replaceWithPtrcasted(CI, ElemTy, KnownTy, AssignCI);
           eraseTodoType(Op);
           continue;
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 5ed2303f4e3d3b..94c4c21b5cbbe6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -295,11 +295,13 @@ inline Type *applyWrappers(Type *Ty) {
 }
 
 inline Type *getPointeeType(Type *Ty) {
-  if (auto PType = dyn_cast<TypedPointerType>(Ty))
-    return PType->getElementType();
-  else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
-    if (isTypedPointerWrapper(ExtTy))
-      return applyWrappers(ExtTy->getTypeParameter(0));
+  if (Ty) {
+    if (auto PType = dyn_cast<TypedPointerType>(Ty))
+      return PType->getElementType();
+    else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
+      if (isTypedPointerWrapper(ExtTy))
+        return applyWrappers(ExtTy->getTypeParameter(0));
+  }
   return nullptr;
 }
 
diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll
new file mode 100644
index 00000000000000..0cceb100581c11
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll
@@ -0,0 +1,64 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - --translator-compatibility-mode | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: OpName %[[#Bar:]] "bar"
+; CHECK-DAG: OpName %[[#Foo:]] "foo"
+; CHECK-DAG: OpName %[[#Test:]] "test"
+; CHECK-DAG: %[[#Void:]] = OpTypeVoid
+; CHECK-DAG: %[[#Long:]] = OpTypeInt 64 0
+; CHECK-DAG: %[[#LongArr:]] = OpTypeArray %[[#Long]] %[[#]]
+; CHECK-DAG: %[[#StructLongArr:]] = OpTypeStruct %[[#LongArr]]
+; CHECK-DAG: %[[#Struct:]] = OpTypeStruct %[[#StructLongArr]]
+; CHECK-DAG: %[[#StructGenPtr:]] = OpTypePointer Generic %[[#Struct]]
+; CHECK-DAG: %[[#StructFunPtr:]] = OpTypePointer Function %[[#Struct]]
+; CHECK-DAG: %[[#StructGenGenPtr:]] = OpTypePointer Generic %[[#StructGenPtr]]
+; CHECK-DAG: %[[#StructFunGenPtr:]] = OpTypePointer Function %[[#StructGenPtr]]
+
+; CHECK: %[[#Bar]] = OpFunction
+; CHECK: %[[#BarVar:]] = OpVariable %[[#StructFunPtr]] Function
+; CHECK: %[[#BarVarToGen:]] = OpPtrCastToGeneric %[[#StructGenPtr]] %[[#BarVar]]
+; CHECK: %[[#]] = OpFunctionCall %[[#Void]] %[[#Foo]] %[[#BarVarToGen]]
+
+; CHECK: %[[#Foo]] = OpFunction
+; CHECK: %[[#FooArg1:]] = OpFunctionParameter %[[#StructGenPtr]]
+; CHECK: %[[#FooVar:]] = OpVariable %[[#StructFunGenPtr]] Function
+; CHECK: %[[#FooVarToGen:]] = OpPtrCastToGeneric %[[#StructGenGenPtr]] %[[#FooVar]]
+; CHECK: OpStore %[[#FooVarToGen]] %[[#FooArg1]]
+; CHECK: %[[#FooLoad:]] = OpLoad %[[#StructGenPtr]] %[[#FooVarToGen]]
+; CHECK: %[[#]] = OpFunctionCall %[[#Void:]] %[[#Test]] %[[#FooLoad:]]
+
+; CHECK: %[[#Test]] = OpFunction
+; CHECK: %[[#TestArg1:]] = OpFunctionParameter %[[#StructGenPtr]]
+; CHECK: %[[#TestVar:]] = OpVariable %[[#StructFunGenPtr]] Function
+; CHECK: %[[#TestVarToGen:]] = OpPtrCastToGeneric %[[#StructGenGenPtr]] %[[#TestVar]]
+; CHECK: OpStore %[[#TestVarToGen]] %[[#TestArg1]]
+
+%t_range = type { %t_arr }
+%t_arr = type { [1 x i64] }
+
+define internal spir_func void @bar() {
+  %GlobalOffset = alloca %t_range, align 8
+  %GlobalOffset.ascast = addrspacecast ptr %GlobalOffset to ptr addrspace(4)
+  call spir_func void @foo(ptr addrspace(4) noundef align 8 dereferenceable(8) %GlobalOffset.ascast)
+  ret void
+}
+
+define internal spir_func void @foo(ptr addrspace(4) noundef align 8 dereferenceable(8) %Offset) {
+entry:
+  %Offset.addr = alloca ptr addrspace(4), align 8
+  %Offset.addr.ascast = addrspacecast ptr %Offset.addr to ptr addrspace(4)
+  store ptr addrspace(4) %Offset, ptr addrspace(4) %Offset.addr.ascast, align 8
+  %r2 = load ptr addrspace(4), ptr addrspace(4) %Offset.addr.ascast, align 8
+  call spir_func void @test(ptr addrspace(4) noundef align 8 dereferenceable(8) %r2)
+  ret void
+}
+
+define void @test(ptr addrspace(4) noundef align 8 dereferenceable(8) %offset) {
+  %offset.addr = alloca ptr addrspace(4), align 8
+  %offset.addr.ascast = addrspacecast ptr %offset.addr to ptr addrspace(4)
+  store ptr addrspace(4) %offset, ptr addrspace(4) %offset.addr.ascast, align 8
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
index fcb61911e0d292..e512f909cfd059 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll
@@ -64,6 +64,19 @@ declare dso_local spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU
 ; CHECK: OpGroupWaitEvents %[[#]] %[[#]] %[[#EventVarBarGen]]
 ; CHECK: OpFunctionEnd
 
+; CHECK2: OpFunction
+; CHECK2: %[[#BarArg1:]] = OpFunctionParameter %[[#TyPtrSV4_W]]
+; CHECK2: %[[#BarArg2:]] = OpFunctionParameter %[[#TyPtrSV4_CW]]
+; CHECK2: %[[#EventVarBar:]] = OpVariable %[[#TyEventPtr]] Function
+; CHECK2: %[[#SrcBar:]] = OpInBoundsPtrAccessChain %[[#TyPtrSV4_CW]] %[[#BarArg2]] %[[#]]
+; CHECK2-DAG: %[[#BarArg1Casted:]] = OpBitcast %[[#TyPtrV4_W]] %[[#BarArg1]]
+; CHECK2-DAG: %[[#SrcBarCasted:]] = OpBitcast %[[#TyPtrV4_CW]] %[[#SrcBar]]
+; CHECK2: %[[#ResBar:]] = OpGroupAsyncCopy %[[#TyEvent]] %[[#]] %[[#BarArg1Casted]] %[[#SrcBarCasted]] %[[#]] %[[#]] %[[#ConstEvent]]
+; CHECK2: OpStore %[[#EventVarBar]] %[[#ResBar]]
+; CHECK2: %[[#EventVarBarGen:]] = OpPtrCastToGeneric %[[#TyEventPtrGen]] %[[#EventVarBar]]
+; CHECK2: OpGroupWaitEvents %[[#]] %[[#]] %[[#EventVarBarGen]]
+; CHECK2: OpFunctionEnd
+
 %Vec4 = type { <4 x i8> }
 
 define spir_kernel void @bar(ptr addrspace(3) %_arg_Local, ptr addrspace(1) readonly %_arg) {

>From 9ba240b12b1bfb0216a4ed9555b263147a5bddea Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Tue, 26 Nov 2024 09:05:09 -0800
Subject: [PATCH 08/17] improve type inference

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 197 +++++++++++++-----
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp |  45 ++--
 llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp   |   1 +
 llvm/lib/Target/SPIRV/SPIRVUtils.cpp          |  23 +-
 llvm/lib/Target/SPIRV/SPIRVUtils.h            |  10 +-
 .../SPV_INTEL_function_pointers/fp_const.ll   |  12 +-
 .../SPIRV/pointers/builtin-ret-reg-type.ll    |   7 +-
 7 files changed, 204 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index e9b3345fc3309b..748af7df9e877e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -62,6 +62,10 @@ inline MetadataAsValue *buildMD(Value *Arg) {
       Ctx, MDNode::get(Ctx, ValueAsMetadata::getConstant(Arg)));
 }
 
+inline bool mayUpdateOpType(Value *Op) {
+  return !(isa<CallInst>(Op) || isa<GetElementPtrInst>(Op));
+}
+
 class SPIRVEmitIntrinsics
     : public ModulePass,
       public InstVisitor<SPIRVEmitIntrinsics, Instruction *> {
@@ -97,6 +101,11 @@ class SPIRVEmitIntrinsics
     auto It = TodoType.find(Op);
     return It != TodoType.end() && It->second;
   }
+  // bool mayUpdateOpType(Value *Op) {
+  //   if (isa<CallInst>(Op) || isa<GetElementPtrInst>(Op))
+  //     return false;
+  //   return isTodoType(Op);
+  // }
 
   // well known result types of builtins
   enum WellKnownTypes { Event };
@@ -177,8 +186,9 @@ class SPIRVEmitIntrinsics
       CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
       Type *&KnownElemTy, bool IsPostprocessing);
 
-  void replaceWithPtrcasted(Instruction *CI, Type *NewElemTy, Type *KnownElemTy,
-                            CallInst *AssignCI);
+  CallInst *buildSpvPtrcast(Instruction *I, Type *ElemTy);
+  void propagateElemTypeInUses(Instruction *I, Type *ElemTy);
+
   void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true);
 
   bool runOnFunction(Function &F);
@@ -397,6 +407,7 @@ void SPIRVEmitIntrinsics::buildAssignPtr(IRBuilder<> &B, Type *ElemTy,
     GR->addDeducedElementType(Arg, ElemTy);
     GR->addAssignPtrTypeInstr(Arg, AssignPtrTyCI);
   } else {
+    assert(mayUpdateOpType(Arg) || "Forbidden to update assigned type");
     updateAssignType(AssignPtrTyCI, Arg, OfType);
   }
 }
@@ -518,8 +529,11 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
   if (auto *Ref = dyn_cast<AllocaInst>(I)) {
     maybeAssignPtrType(Ty, I, Ref->getAllocatedType(), UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
-    // TODO: Iterate the indices to find the return type if it's a pointer
     Ty = Ref->getResultElementType();
+    if (isNestedPointer(Ty)) {
+      for (Use &U : drop_begin(Ref->indices()))
+        Ty = GetElementPtrInst::getTypeAtIndex(Ty, U.get());
+    }
   } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
     Value *Op = Ref->getPointerOperand();
     Type *KnownTy = GR->findDeducedElementType(Op);
@@ -598,6 +612,12 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
                                      Visited, UnknownElemTypeI8);
       else if (Type *KnownRetTy = GR->findDeducedElementType(CalledF))
         Ty = KnownRetTy;
+      /*
+            else {
+              Ty = IntegerType::getInt8Ty(I->getContext());
+              insertTodoType(I);
+            }
+      */
     }
   }
 
@@ -644,7 +664,7 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
         if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
           if (Type *NestedTy =
                   deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
-            Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
+            Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
         } else {
           Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
                                       UnknownElemTypeI8);
@@ -665,7 +685,7 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper(
       if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
         if (Type *NestedTy =
                 deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
-          Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
+          Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
       } else {
         Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
                                     UnknownElemTypeI8);
@@ -792,7 +812,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
     if (ArgTy->isPointerTy()) {
       if (Type *ElemTy = GR->findDeducedElementType(Arg)) {
         IsNewFTy = true;
-        ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy));
+        ArgTy = getTypedPointerWrapper(ElemTy, getPointerAddressSpace(ArgTy));
         if (isTodoType(Arg))
           IsUncomplete = true;
       } else {
@@ -806,7 +826,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
     if (Type *ElemTy = GR->findDeducedElementType(CI)) {
       IsNewFTy = true;
       RetTy =
-          TypedPointerType::get(ElemTy, getPointerAddressSpace(CI->getType()));
+          getTypedPointerWrapper(ElemTy, getPointerAddressSpace(CI->getType()));
       if (isTodoType(CI))
         IsUncomplete = true;
     } else {
@@ -847,18 +867,11 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     Uncomplete = isTodoType(I);
     Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
-    // TODO: GR->findDeduceElementType()
-    //  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
-    //    return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(),
-    //                                            GEPI->getSourceElementType(),
-    //                                            0, B);
-    //  }
-    KnownElemTy = Ref->getSourceElementType();
-    if (isUntypedPointerTy(KnownElemTy))
-      return;
-    Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
-    if (PointeeTy && !isUntypedPointerTy(PointeeTy))
+    // TODO: ensure that Ref->getPointerOperand() has
+    // Ref->getSourceElementType()
+    if (GR->findDeducedElementType(Ref->getPointerOperand()))
       return;
+    KnownElemTy = Ref->getSourceElementType();
     Ops.push_back(std::make_pair(Ref->getPointerOperand(),
                                  GetElementPtrInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
@@ -871,8 +884,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     Ops.push_back(std::make_pair(Ref->getPointerOperand(),
                                  LoadInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
-    if (IsKernelArgInt8(Ref->getParent()->getParent(), Ref))
-      return;
+    // if (IsKernelArgInt8(Ref->getParent()->getParent(), Ref))
+    //   return;
     if (!(KnownElemTy =
               reconstructType(Ref->getValueOperand(),
                               false /*UnknownElemTypeI8*/, IsPostprocessing)))
@@ -914,9 +927,19 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     if (!(KnownElemTy = GR->findDeducedElementType(CurrF))) {
       if (Type *OpElemTy = GR->findDeducedElementType(Op)) {
         GR->addDeducedElementType(CurrF, OpElemTy);
-        TypedPointerType *DerivedTy =
-            TypedPointerType::get(OpElemTy, getPointerAddressSpace(RetTy));
-        GR->addReturnType(CurrF, DerivedTy);
+        GR->addReturnType(CurrF, TypedPointerType::get(
+                                     OpElemTy, getPointerAddressSpace(RetTy)));
+        for (User *U : CurrF->users()) {
+          CallInst *CI = dyn_cast<CallInst>(U);
+          if (!CI || CI->getCalledFunction() != CurrF)
+            continue;
+          if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(CI)) {
+            if (Type *PrevElemTy = GR->findDeducedElementType(CI)) {
+              updateAssignType(AssignCI, CI, PoisonValue::get(OpElemTy));
+              propagateElemTypeInUses(CI, PrevElemTy);
+            }
+          }
+        }
       }
       return;
     }
@@ -970,7 +993,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
       continue;
     Value *OpTyVal = PoisonValue::get(KnownElemTy);
     Type *OpTy = Op->getType();
-    if (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)) {
+    if ( // mayUpdateOpType(Op) &&
+        (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op))) {
       GR->addDeducedElementType(Op, KnownElemTy);
       // check if KnownElemTy is complete
       if (!Uncomplete)
@@ -1006,11 +1030,11 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
                                       B.getInt32(getPointerAddressSpace(OpTy))};
       CallInst *PtrCastI =
           B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+      buildAssignPtr(B, KnownElemTy, PtrCastI);
       if (OpIt.second == std::numeric_limits<unsigned>::max())
         dyn_cast<CallInst>(I)->setCalledOperand(PtrCastI);
       else
         I->setOperand(OpIt.second, PtrCastI);
-      buildAssignPtr(B, KnownElemTy, PtrCastI);
     }
   }
 }
@@ -1249,6 +1273,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
 
   // Our previous guess about the type seems to be wrong, let's update
   // inferred type according to a new, more precise type information.
+  assert(mayUpdateOpType(V) || "Forbidden to update assigned type");
   updateAssignType(AssignCI, V, PoisonValue::get(AssignedType));
 }
 
@@ -1312,13 +1337,16 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
       buildAssignPtr(B, ExpectedElementType, Pointer);
       return;
     } else if (isTodoType(Pointer)) {
-      // If this wouldn't be the first spv_ptrcast but existing type info is
-      // uncomplete, update spv_assign_ptr_type arguments.
-      if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer))
-        updateAssignType(AssignCI, Pointer, ExpectedElementVal);
-      else
-        buildAssignPtr(B, ExpectedElementType, Pointer);
-      return;
+      eraseTodoType(Pointer);
+      if (mayUpdateOpType(Pointer)) {
+        //  If this wouldn't be the first spv_ptrcast but existing type info is
+        //  uncomplete, update spv_assign_ptr_type arguments.
+        if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer))
+          updateAssignType(AssignCI, Pointer, ExpectedElementVal);
+        else
+          buildAssignPtr(B, ExpectedElementType, Pointer);
+        return;
+      }
     }
   }
 
@@ -1336,10 +1364,11 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
   // Handle basic instructions:
   StoreInst *SI = dyn_cast<StoreInst>(I);
   if (IsKernelArgInt8(CurrF, SI)) {
-    return replacePointerOperandWithPtrCast(
+    replacePointerOperandWithPtrCast(
         I, SI->getValueOperand(), IntegerType::getInt8Ty(CurrF->getContext()),
         0, B);
-  } else if (SI) {
+  }
+  if (SI) {
     Value *Op = SI->getValueOperand();
     Type *OpTy = Op->getType();
     if (auto *OpI = dyn_cast<Instruction>(Op))
@@ -1349,11 +1378,45 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
     return replacePointerOperandWithPtrCast(I, SI->getPointerOperand(), OpTy, 1,
                                             B);
   } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    return replacePointerOperandWithPtrCast(I, LI->getPointerOperand(),
-                                            LI->getType(), 0, B);
+    Value *Pointer = LI->getPointerOperand();
+    Type *OpTy = LI->getType();
+    if (auto *PtrTy = dyn_cast<PointerType>(
+            OpTy)) { // TODO: isNestedPointer or rather getNestedPointerAS()
+      if (Type *ElemTy = GR->findDeducedElementType(LI)) {
+        OpTy = getTypedPointerWrapper(ElemTy, PtrTy->getAddressSpace());
+      } else {
+        Type *NewOpTy = OpTy;
+        OpTy = deduceElementTypeByValueDeep(OpTy, LI, false);
+        if (OpTy == NewOpTy)
+          insertTodoType(Pointer);
+      }
+    }
+    return replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
   } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
-    return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(),
-                                            GEPI->getSourceElementType(), 0, B);
+    /*
+        Value *Pointer = GEPI->getPointerOperand();
+        Type *OpTy = GEPI->getSourceElementType();
+        if (auto *PtrTy = dyn_cast<PointerType>(
+                OpTy)) { // TODO: isNestedPointer or rather getNestedPointerAS()
+          if (Type *ElemTy = GR->findDeducedElementType(Pointer)) {
+            return;
+          } else {
+            if (Type *ElemTy = deduceElementTypeHelper(Pointer, false))
+              OpTy = ElemTy;
+            else
+              insertTodoType(Pointer);
+          }
+        }
+        //::getPointeeType(OpTy)
+        // isNestedPointer
+        return replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
+    */
+    Value *Pointer = GEPI->getPointerOperand();
+    Type *OpTy = GEPI->getSourceElementType();
+    replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
+    if (isNestedPointer(OpTy))
+      insertTodoType(Pointer);
+    return;
   }
 
   // TODO: review and maybe merge with existing logics the following ...:
@@ -1971,7 +2034,7 @@ static FunctionType *getFunctionPointerElemType(Function *F,
     if (ArgTy->isPointerTy())
       if (Type *ElemTy = GR->findDeducedElementType(&Arg)) {
         IsNewFTy = true;
-        ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy));
+        ArgTy = getTypedPointerWrapper(ElemTy, getPointerAddressSpace(ArgTy));
       }
     ArgTys.push_back(ArgTy);
   }
@@ -2128,23 +2191,33 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   return true;
 }
 
-void SPIRVEmitIntrinsics::replaceWithPtrcasted(Instruction *CI, Type *NewElemTy,
-                                               Type *KnownElemTy,
-                                               CallInst *AssignCI) {
-  IRBuilder<> B(CI->getContext());
-  B.SetInsertPoint(*CI->getInsertionPointAfterDef());
-  B.SetCurrentDebugLocation(CI->getDebugLoc());
-  Type *OpTy = CI->getType();
+CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Instruction *I, Type *ElemTy) {
+  IRBuilder<> B(I->getContext());
+  B.SetInsertPoint(*I->getInsertionPointAfterDef());
+  B.SetCurrentDebugLocation(I->getDebugLoc());
+  Type *OpTy = I->getType();
   SmallVector<Type *, 2> Types = {OpTy, OpTy};
-  SmallVector<Value *, 2> Args = {CI, buildMD(PoisonValue::get(KnownElemTy)),
+  SmallVector<Value *, 2> Args = {I, buildMD(PoisonValue::get(ElemTy)),
                                   B.getInt32(getPointerAddressSpace(OpTy))};
   CallInst *PtrCasted =
       B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
-  SmallVector<User *> Users(CI->users());
-  for (auto *U : Users)
-    if (U != AssignCI && U != PtrCasted)
-      U->replaceUsesOfWith(CI, PtrCasted);
-  buildAssignPtr(B, KnownElemTy, PtrCasted);
+  buildAssignPtr(B, ElemTy, PtrCasted);
+  return PtrCasted;
+}
+
+void SPIRVEmitIntrinsics::propagateElemTypeInUses(Instruction *I,
+                                                  Type *ElemTy) {
+  CallInst *PtrCasted = buildSpvPtrcast(I, ElemTy);
+  SmallVector<User *> Users(I->users());
+  for (auto *U : Users) {
+    if (isa<BitCastInst>(U) || isa<GetElementPtrInst>(U))
+      continue;
+    if (const auto *II = dyn_cast<IntrinsicInst>(U))
+      if (Function *F = II->getCalledFunction())
+        if (F->getName().starts_with("llvm.spv."))
+          continue;
+    U->replaceUsesOfWith(I, PtrCasted);
+  }
 }
 
 // Try to deduce a better type for pointers to untyped ptr.
@@ -2168,8 +2241,12 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
       std::unordered_set<Value *> Visited;
       if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) {
         if (ElemTy != KnownTy) {
-          updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
-          replaceWithPtrcasted(CI, ElemTy, KnownTy, AssignCI);
+          if (mayUpdateOpType(CI)) {
+            updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
+            propagateElemTypeInUses(CI, KnownTy);
+          } else {
+            propagateElemTypeInUses(CI, ElemTy);
+          }
           eraseTodoType(Op);
           continue;
         }
@@ -2212,6 +2289,7 @@ bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
   // Specify function parameters after all functions were processed.
   for (auto &F : M) {
     // check if function parameter types are set
+    CurrF = &F;
     if (!F.isDeclaration() && !F.isIntrinsic()) {
       IRBuilder<> B(F.getContext());
       processParamTypes(&F, B);
@@ -2223,7 +2301,16 @@ bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
 
   if (HaveFunPtrs)
     Changed |= processFunctionPointers(M);
-
+  /*
+    TodoType.clear();
+    TodoTypeSz = 0;
+    std::unordered_set<Value *> Visited;
+    for (auto &F : M) {
+      CurrF = &F;
+      for (auto &I : instructions(F))
+        deduceOperandElementType(&I, nullptr, true);
+    }
+  */
   return Changed;
 }
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 4e539fcd6c9999..9ac659f6b4f111 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -28,6 +28,18 @@
 #include <functional>
 
 using namespace llvm;
+
+inline unsigned typeToAddressSpace(const Type *Ty) {
+  if (auto PType = dyn_cast<TypedPointerType>(Ty))
+    return PType->getAddressSpace();
+  if (auto PType = dyn_cast<PointerType>(Ty))
+    return PType->getAddressSpace();
+  if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+      ExtTy && isTypedPointerWrapper(ExtTy))
+    return ExtTy->getIntParameter(0);
+  report_fatal_error("Unable to convert LLVM type to SPIRVType", true);
+}
+
 SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize)
     : PointerSize(PointerSize), Bound(0) {}
 
@@ -570,13 +582,13 @@ Register
 SPIRVGlobalRegistry::getOrCreateConstNullPtr(MachineIRBuilder &MIRBuilder,
                                              SPIRVType *SpvType) {
   const Type *LLVMTy = getTypeForSPIRVType(SpvType);
-  const TypedPointerType *LLVMPtrTy = cast<TypedPointerType>(LLVMTy);
+  unsigned AddressSpace = typeToAddressSpace(LLVMTy);
   // Find a constant in DT or build a new one.
-  Constant *CP = ConstantPointerNull::get(PointerType::get(
-      LLVMPtrTy->getElementType(), LLVMPtrTy->getAddressSpace()));
+  Constant *CP = ConstantPointerNull::get(
+      PointerType::get(::getPointeeType(LLVMTy), AddressSpace));
   Register Res = DT.find(CP, CurMF);
   if (!Res.isValid()) {
-    LLT LLTy = LLT::pointer(LLVMPtrTy->getAddressSpace(), PointerSize);
+    LLT LLTy = LLT::pointer(AddressSpace, PointerSize);
     Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
     CurMF->getRegInfo().setRegClass(Res, &SPIRV::pIDRegClass);
     assignSPIRVTypeToVReg(SpvType, Res, *CurMF);
@@ -978,18 +990,11 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(
     }
     return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder);
   }
-  unsigned AddrSpace = 0xFFFF;
-  if (auto PType = dyn_cast<TypedPointerType>(Ty))
-    AddrSpace = PType->getAddressSpace();
-  else if (auto PType = dyn_cast<PointerType>(Ty))
-    AddrSpace = PType->getAddressSpace();
-  else
-    report_fatal_error("Unable to convert LLVM type to SPIRVType", true);
 
+  unsigned AddrSpace = typeToAddressSpace(Ty);
   SPIRVType *SpvElementType = nullptr;
-  if (auto PType = dyn_cast<TypedPointerType>(Ty))
-    SpvElementType = getOrCreateSPIRVType(PType->getElementType(), MIRBuilder,
-                                          AccQual, EmitIR);
+  if (Type *ElemTy = ::getPointeeType(Ty))
+    SpvElementType = getOrCreateSPIRVType(ElemTy, MIRBuilder, AccQual, EmitIR);
   else
     SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder);
 
@@ -1029,7 +1034,11 @@ SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
   // will be added later. For special types it is already added to DT.
   if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() &&
       !isSpecialOpaqueType(Ty)) {
-    if (!isPointerTy(Ty))
+    if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+        ExtTy && isTypedPointerWrapper(ExtTy))
+      DT.add(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0),
+             &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
+    else if (!isPointerTy(Ty))
       DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
     else if (isTypedPointerTy(Ty))
       DT.add(cast<TypedPointerType>(Ty)->getElementType(),
@@ -1065,7 +1074,11 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
     const Type *Ty, MachineIRBuilder &MIRBuilder,
     SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
   Register Reg;
-  if (!isPointerTy(Ty)) {
+  if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+      ExtTy && isTypedPointerWrapper(ExtTy)) {
+    Reg = DT.find(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0),
+                  &MIRBuilder.getMF());
+  } else if (!isPointerTy(Ty)) {
     Ty = adjustIntTypeByWidth(Ty);
     Reg = DT.find(Ty, &MIRBuilder.getMF());
   } else if (isTypedPointerTy(Ty)) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index b53ea1f7edf4a0..d5b81bf46c804e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -394,6 +394,7 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const {
       case SPIRV::OpGenericCastToPtr:
         validateAccessChain(STI, MRI, GR, MI);
         break;
+      case SPIRV::OpPtrAccessChain:
       case SPIRV::OpInBoundsPtrAccessChain:
         if (MI.getNumOperands() == 4)
           validateAccessChain(STI, MRI, GR, MI);
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index c5bf02f1a4bca3..2e552172bf2689 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -406,8 +406,10 @@ bool hasBuiltinTypePrefix(StringRef Name) {
 }
 
 bool isSpecialOpaqueType(const Type *Ty) {
-  if (const TargetExtType *EType = dyn_cast<TargetExtType>(Ty))
-    return hasBuiltinTypePrefix(EType->getName());
+  if (const TargetExtType *ExtTy = dyn_cast<TargetExtType>(Ty))
+    return isTypedPointerWrapper(ExtTy)
+               ? false
+               : hasBuiltinTypePrefix(ExtTy->getName());
 
   return false;
 }
@@ -728,4 +730,21 @@ Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR,
                                MIRBuilder);
 }
 
+// Return true if there is an opaque pointer type nested in the argument.
+bool isNestedPointer(const Type *Ty) {
+  if (Ty->isPtrOrPtrVectorTy())
+    return true;
+  if (const FunctionType *RefTy = dyn_cast<FunctionType>(Ty)) {
+    if (isNestedPointer(RefTy->getReturnType()))
+      return true;
+    for (const Type *ArgTy : RefTy->params())
+      if (isNestedPointer(ArgTy))
+        return true;
+    return false;
+  }
+  if (const ArrayType *RefTy = dyn_cast<ArrayType>(Ty))
+    return isNestedPointer(RefTy->getElementType());
+  return false;
+}
+
 } // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 94c4c21b5cbbe6..0965789395abfa 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -274,7 +274,7 @@ inline Type *getTypedPointerWrapper(Type *ElemTy, unsigned AS) {
                             {ElemTy}, {AS});
 }
 
-inline bool isTypedPointerWrapper(TargetExtType *ExtTy) {
+inline bool isTypedPointerWrapper(const TargetExtType *ExtTy) {
   return ExtTy->getName() == TYPED_PTR_TARGET_EXT_NAME &&
          ExtTy->getNumIntParameters() == 1 &&
          ExtTy->getNumTypeParameters() == 1;
@@ -294,13 +294,14 @@ inline Type *applyWrappers(Type *Ty) {
   return Ty;
 }
 
-inline Type *getPointeeType(Type *Ty) {
+inline Type *getPointeeType(const Type *Ty) {
   if (Ty) {
     if (auto PType = dyn_cast<TypedPointerType>(Ty))
       return PType->getElementType();
     else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
       if (isTypedPointerWrapper(ExtTy))
-        return applyWrappers(ExtTy->getTypeParameter(0));
+        return ExtTy->getTypeParameter(0);
+        //return applyWrappers(ExtTy->getTypeParameter(0));
   }
   return nullptr;
 }
@@ -375,5 +376,8 @@ Register createVirtualRegister(const MachineInstr *SpvType,
 Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR,
                                MachineIRBuilder &MIRBuilder);
 
+// Return true if there is an opaque pointer type nested in the argument.
+bool isNestedPointer(const Type *Ty);
+
 } // namespace llvm
 #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
index 3ebfa1d8c8a9d9..6aeb29df9f7bd4 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
@@ -1,7 +1,6 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s
 ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
-; CHECK-DAG: OpCapability Int8
 ; CHECK-DAG: OpCapability FunctionPointersINTEL
 ; CHECK-DAG: OpCapability Int64
 ; CHECK: OpExtension "SPV_INTEL_function_pointers"
@@ -9,19 +8,14 @@
 ; CHECK-DAG: %[[TyVoid:.*]] = OpTypeVoid
 ; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0
 ; CHECK-DAG: %[[TyFun:.*]] = OpTypeFunction %[[TyInt64]] %[[TyInt64]]
-; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0
 ; CHECK-DAG: %[[TyPtrFunCodeSection:.*]] = OpTypePointer CodeSectionINTEL %[[TyFun]]
 ; CHECK-DAG: %[[ConstFunFp:.*]] = OpConstantFunctionPointerINTEL %[[TyPtrFunCodeSection]] %[[DefFunFp:.*]]
 ; CHECK-DAG: %[[TyPtrFun:.*]] = OpTypePointer Function %[[TyFun]]
 ; CHECK-DAG: %[[TyPtrPtrFun:.*]] = OpTypePointer Function %[[TyPtrFun]]
-; CHECK-DAG: %[[TyPtrInt8:.*]] = OpTypePointer Function %[[TyInt8]]
-; CHECK-DAG: %[[TyPtrPtrInt8:.*]] = OpTypePointer Function %[[TyPtrInt8]]
 ; CHECK: OpFunction
-; CHECK: %[[Var:.*]] = OpVariable %[[TyPtrPtrInt8]] Function
-; CHECK: %[[SAddr:.*]] = OpBitcast %[[TyPtrPtrFun]] %[[Var]]
-; CHECK: OpStore %[[SAddr]] %[[ConstFunFp]]
-; CHECK: %[[LAddr:.*]] = OpBitcast %[[TyPtrPtrFun]] %[[Var]]
-; CHECK: %[[FP:.*]] = OpLoad %[[TyPtrFun]] %[[LAddr]]
+; CHECK: %[[Var:.*]] = OpVariable %[[TyPtrPtrFun]] Function
+; CHECK: OpStore %[[Var]] %[[ConstFunFp]]
+; CHECK: %[[FP:.*]] = OpLoad %[[TyPtrFun]] %[[Var]]
 ; CHECK: OpFunctionPointerCallINTEL %[[TyInt64]] %[[FP]] %[[#]]
 ; CHECK: OpFunctionEnd
  
diff --git a/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll b/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll
index afa97ccfc0a69c..a846e1936d7ac5 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll
@@ -1,13 +1,8 @@
 ; The goal of the test case is to ensure that correct types are applied to virtual registers which were
 ; used as return values in call lowering. Pass criterion is that spirv-val considers output valid.
 
-; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV
 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; CHECK-SPIRV: OpFunction
-; CHECK-SPIRV: %[[#]] = OpPhi %[[#]] %[[#Ptr:]] %[[#]] %[[#]] %[[#]]
-; CHECK-SPIRV: %[[#Ptr]] = OpPtrAccessChain %[[#]] %[[#]] %[[#]]
-
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
 %t_half = type { half }
 %t_i17 = type { [17 x i32] }

>From a9d62df2eb86e0d2bec0671b7b87979fb99d5734 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Tue, 26 Nov 2024 12:01:23 -0800
Subject: [PATCH 09/17] improve type inference for GEP

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 159 +++++++++++-------
 llvm/lib/Target/SPIRV/SPIRVUtils.cpp          |   9 +
 llvm/lib/Target/SPIRV/SPIRVUtils.h            |   3 +-
 3 files changed, 112 insertions(+), 59 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 748af7df9e877e..fb1796a5612c26 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -186,8 +186,11 @@ class SPIRVEmitIntrinsics
       CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
       Type *&KnownElemTy, bool IsPostprocessing);
 
-  CallInst *buildSpvPtrcast(Instruction *I, Type *ElemTy);
-  void propagateElemTypeInUses(Instruction *I, Type *ElemTy);
+  CallInst *buildSpvPtrcast(Value *Op, Type *ElemTy);
+  void propagateElemType(Value *Op, Type *ElemTy);
+  void propagateElemTypeRec(Value *Op, Type *PtrElemTy, CallInst *PtrCasted);
+  void propagateElemTypeRec(Value *Op, Type *PtrElemTy, CallInst *PtrCasted,
+                            std::unordered_set<Value *> &Visited);
 
   void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true);
 
@@ -245,10 +248,8 @@ bool expectIgnoredInIRTranslation(const Instruction *I) {
 }
 
 bool allowEmitFakeUse(const Value *Arg) {
-  if (const auto *II = dyn_cast<IntrinsicInst>(Arg))
-    if (Function *F = II->getCalledFunction())
-      if (F->getName().starts_with("llvm.spv."))
-        return false;
+  if (isSpvIntrinsic(Arg))
+    return false;
   if (dyn_cast<AtomicCmpXchgInst>(Arg) || dyn_cast<InsertValueInst>(Arg) ||
       dyn_cast<UndefValue>(Arg))
     return false;
@@ -425,6 +426,79 @@ void SPIRVEmitIntrinsics::updateAssignType(CallInst *AssignCI, Value *Arg,
   GR->addDeducedElementType(Arg, ElemTy);
 }
 
+CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Value *Op, Type *ElemTy) {
+  IRBuilder<> B(Op->getContext());
+  if (auto *OpI = dyn_cast<Instruction>(Op)) {
+    // spv_ptrcast's argument Op denotes an instruction that generates
+    // a value, and we may use getInsertionPointAfterDef()
+    setInsertPointAfterDef(B, OpI);
+  } else if (auto *OpA = dyn_cast<Argument>(Op)) {
+    B.SetInsertPointPastAllocas(OpA->getParent());
+    B.SetCurrentDebugLocation(DebugLoc());
+  } else {
+    B.SetInsertPoint(CurrF->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
+  }
+  Type *OpTy = Op->getType();
+  SmallVector<Type *, 2> Types = {OpTy, OpTy};
+  SmallVector<Value *, 2> Args = {Op, buildMD(PoisonValue::get(ElemTy)),
+                                  B.getInt32(getPointerAddressSpace(OpTy))};
+  CallInst *PtrCasted =
+      B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+  buildAssignPtr(B, ElemTy, PtrCasted);
+  return PtrCasted;
+}
+
+void SPIRVEmitIntrinsics::propagateElemType(Value *Op, Type *ElemTy) {
+  CallInst *PtrCasted = buildSpvPtrcast(Op, ElemTy);
+  SmallVector<User *> Users(Op->users());
+  for (auto *U : Users) {
+    if (isa<BitCastInst>(U) || isa<GetElementPtrInst>(U) || isSpvIntrinsic(U))
+      continue;
+    U->replaceUsesOfWith(Op, PtrCasted);
+  }
+}
+
+void SPIRVEmitIntrinsics::propagateElemTypeRec(Value *Op, Type *PtrElemTy,
+                                               CallInst *PtrCasted) {
+  if (!isNestedPointer(PtrElemTy))
+    return;
+  std::unordered_set<Value *> Visited;
+  propagateElemTypeRec(Op, PtrElemTy, PtrCasted, Visited);
+}
+
+void SPIRVEmitIntrinsics::propagateElemTypeRec(
+    Value *Op, Type *PtrElemTy, CallInst *PtrCasted,
+    std::unordered_set<Value *> &Visited) {
+  if (!Visited.insert(Op).second)
+    return;
+  SmallVector<User *> Users(Op->users());
+  for (auto *U : Users) {
+    if (isa<BitCastInst>(U) || isSpvIntrinsic(U))
+      continue;
+    if (auto *Ref = dyn_cast<GetElementPtrInst>(U)) {
+      CallInst *AssignCI = GR->findAssignPtrTypeInstr(Ref);
+      if (AssignCI && Ref->getPointerOperand() == Op) {
+        Type *PrevElemTy = GR->findDeducedElementType(Ref);
+        assert(PrevElemTy && "Expected valid element type");
+        // evaluate a new GEP type
+        Type *NewElemTy = PtrElemTy;
+        for (Use &RefUse : drop_begin(Ref->indices()))
+          NewElemTy =
+              GetElementPtrInst::getTypeAtIndex(NewElemTy, RefUse.get());
+        // record the new GEP type
+        assert(NewElemTy && "Expected valid GEP indices");
+        updateAssignType(AssignCI, Ref, PoisonValue::get(NewElemTy));
+        // recursively propagate change
+        if (isNestedPointer(NewElemTy))
+          propagateElemTypeRec(Ref, NewElemTy, buildSpvPtrcast(Ref, PrevElemTy),
+                               Visited);
+      }
+      continue;
+    }
+    U->replaceUsesOfWith(Op, PtrCasted);
+  }
+}
+
 // Set element pointer type to the given value of ValueTy and tries to
 // specify this type further (recursively) by Operand value, if needed.
 
@@ -530,7 +604,9 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
     maybeAssignPtrType(Ty, I, Ref->getAllocatedType(), UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
     Ty = Ref->getResultElementType();
-    if (isNestedPointer(Ty)) {
+    if (isNestedPointer(Ref->getSourceElementType())) {
+      Type *PtrElemTy = GR->findDeducedElementType(Ref->getPointerOperand());
+      Ty = PtrElemTy ? PtrElemTy : Ref->getSourceElementType();
       for (Use &U : drop_begin(Ref->indices()))
         Ty = GetElementPtrInst::getTypeAtIndex(Ty, U.get());
     }
@@ -936,7 +1012,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
           if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(CI)) {
             if (Type *PrevElemTy = GR->findDeducedElementType(CI)) {
               updateAssignType(AssignCI, CI, PoisonValue::get(OpElemTy));
-              propagateElemTypeInUses(CI, PrevElemTy);
+              propagateElemType(CI, PrevElemTy);
             }
           }
         }
@@ -1012,25 +1088,13 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
         GR->addAssignPtrTypeInstr(Op, CI);
       } else {
         updateAssignType(AssignCI, Op, OpTyVal);
+        propagateElemTypeRec(
+            Op, KnownElemTy,
+            buildSpvPtrcast(Op, GR->findDeducedElementType(Op)));
       }
     } else {
       eraseTodoType(Op);
-      if (auto *OpI = dyn_cast<Instruction>(Op)) {
-        // spv_ptrcast's argument Op denotes an instruction that generates
-        // a value, and we may use getInsertionPointAfterDef()
-        setInsertPointAfterDef(B, OpI);
-      } else if (auto *OpA = dyn_cast<Argument>(Op)) {
-        B.SetInsertPointPastAllocas(OpA->getParent());
-        B.SetCurrentDebugLocation(DebugLoc());
-      } else {
-        B.SetInsertPoint(CurrF->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
-      }
-      SmallVector<Type *, 2> Types = {OpTy, OpTy};
-      SmallVector<Value *, 2> Args = {Op, buildMD(OpTyVal),
-                                      B.getInt32(getPointerAddressSpace(OpTy))};
-      CallInst *PtrCastI =
-          B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
-      buildAssignPtr(B, KnownElemTy, PtrCastI);
+      CallInst *PtrCastI = buildSpvPtrcast(Op, KnownElemTy);
       if (OpIt.second == std::numeric_limits<unsigned>::max())
         dyn_cast<CallInst>(I)->setCalledOperand(PtrCastI);
       else
@@ -2191,35 +2255,6 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   return true;
 }
 
-CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Instruction *I, Type *ElemTy) {
-  IRBuilder<> B(I->getContext());
-  B.SetInsertPoint(*I->getInsertionPointAfterDef());
-  B.SetCurrentDebugLocation(I->getDebugLoc());
-  Type *OpTy = I->getType();
-  SmallVector<Type *, 2> Types = {OpTy, OpTy};
-  SmallVector<Value *, 2> Args = {I, buildMD(PoisonValue::get(ElemTy)),
-                                  B.getInt32(getPointerAddressSpace(OpTy))};
-  CallInst *PtrCasted =
-      B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
-  buildAssignPtr(B, ElemTy, PtrCasted);
-  return PtrCasted;
-}
-
-void SPIRVEmitIntrinsics::propagateElemTypeInUses(Instruction *I,
-                                                  Type *ElemTy) {
-  CallInst *PtrCasted = buildSpvPtrcast(I, ElemTy);
-  SmallVector<User *> Users(I->users());
-  for (auto *U : Users) {
-    if (isa<BitCastInst>(U) || isa<GetElementPtrInst>(U))
-      continue;
-    if (const auto *II = dyn_cast<IntrinsicInst>(U))
-      if (Function *F = II->getCalledFunction())
-        if (F->getName().starts_with("llvm.spv."))
-          continue;
-    U->replaceUsesOfWith(I, PtrCasted);
-  }
-}
-
 // Try to deduce a better type for pointers to untyped ptr.
 bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
   if (!GR || TodoTypeSz == 0)
@@ -2228,7 +2263,7 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
   unsigned SzTodo = TodoTypeSz;
   DenseMap<Value *, SmallPtrSet<Value *, 4>> ToProcess;
   for (auto [Op, Enabled] : TodoType) {
-    if (!Enabled)
+    if (!Enabled || isa<GetElementPtrInst>(Op))
       continue;
     CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op);
     Type *KnownTy = GR->findDeducedElementType(Op);
@@ -2241,14 +2276,22 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
       std::unordered_set<Value *> Visited;
       if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) {
         if (ElemTy != KnownTy) {
-          if (mayUpdateOpType(CI)) {
-            updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
-            propagateElemTypeInUses(CI, KnownTy);
+          if (isa<CallInst>(Op)) {
+            propagateElemType(CI, ElemTy);
           } else {
-            propagateElemTypeInUses(CI, ElemTy);
+            updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
+            propagateElemTypeRec(CI, ElemTy, buildSpvPtrcast(CI, KnownTy));
           }
           eraseTodoType(Op);
           continue;
+          /*
+          if (mayUpdateOpType(CI)) {
+            updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
+            propagateElemType(CI, KnownTy);
+          } else {
+            propagateElemType(CI, ElemTy);
+          }
+          */
         }
       }
     }
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 2e552172bf2689..1e9b1af130baa1 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Demangle/Demangle.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsSPIRV.h"
 #include <queue>
 #include <vector>
@@ -747,4 +748,12 @@ bool isNestedPointer(const Type *Ty) {
   return false;
 }
 
+bool isSpvIntrinsic(const Value *Arg) {
+  if (const auto *II = dyn_cast<IntrinsicInst>(Arg))
+    if (Function *F = II->getCalledFunction())
+      if (F->getName().starts_with("llvm.spv."))
+        return true;
+  return false;
+}
+
 } // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 0965789395abfa..368572e311269c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -196,6 +196,8 @@ uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI);
 
 // Check if MI is a SPIR-V specific intrinsic call.
 bool isSpvIntrinsic(const MachineInstr &MI, Intrinsic::ID IntrinsicID);
+// Check if it's a SPIR-V specific intrinsic call.
+bool isSpvIntrinsic(const Value *Arg);
 
 // Get type of i-th operand of the metadata node.
 Type *getMDOperandAsType(const MDNode *N, unsigned I);
@@ -301,7 +303,6 @@ inline Type *getPointeeType(const Type *Ty) {
     else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
       if (isTypedPointerWrapper(ExtTy))
         return ExtTy->getTypeParameter(0);
-        //return applyWrappers(ExtTy->getTypeParameter(0));
   }
   return nullptr;
 }

>From 3f7005fed053b814c2948779ac7678112401b5a6 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Tue, 26 Nov 2024 13:16:40 -0800
Subject: [PATCH 10/17] fix referring to an instruction in another function

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 43 +++++++++++--------
 .../SPIRV/pointers/phi-valid-operand-types.ll |  5 +--
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index fb1796a5612c26..b71655f33045ec 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -186,10 +186,10 @@ class SPIRVEmitIntrinsics
       CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
       Type *&KnownElemTy, bool IsPostprocessing);
 
-  CallInst *buildSpvPtrcast(Value *Op, Type *ElemTy);
+  CallInst *buildSpvPtrcast(Function *F, Value *Op, Type *ElemTy);
   void propagateElemType(Value *Op, Type *ElemTy);
-  void propagateElemTypeRec(Value *Op, Type *PtrElemTy, CallInst *PtrCasted);
-  void propagateElemTypeRec(Value *Op, Type *PtrElemTy, CallInst *PtrCasted,
+  void propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy);
+  void propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy,
                             std::unordered_set<Value *> &Visited);
 
   void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true);
@@ -426,7 +426,8 @@ void SPIRVEmitIntrinsics::updateAssignType(CallInst *AssignCI, Value *Arg,
   GR->addDeducedElementType(Arg, ElemTy);
 }
 
-CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Value *Op, Type *ElemTy) {
+CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Function *F, Value *Op,
+                                               Type *ElemTy) {
   IRBuilder<> B(Op->getContext());
   if (auto *OpI = dyn_cast<Instruction>(Op)) {
     // spv_ptrcast's argument Op denotes an instruction that generates
@@ -436,7 +437,7 @@ CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Value *Op, Type *ElemTy) {
     B.SetInsertPointPastAllocas(OpA->getParent());
     B.SetCurrentDebugLocation(DebugLoc());
   } else {
-    B.SetInsertPoint(CurrF->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
+    B.SetInsertPoint(F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
   }
   Type *OpTy = Op->getType();
   SmallVector<Type *, 2> Types = {OpTy, OpTy};
@@ -449,30 +450,36 @@ CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Value *Op, Type *ElemTy) {
 }
 
 void SPIRVEmitIntrinsics::propagateElemType(Value *Op, Type *ElemTy) {
-  CallInst *PtrCasted = buildSpvPtrcast(Op, ElemTy);
+  // CallInst *PtrCasted = buildSpvPtrcast(Op, ElemTy);
   SmallVector<User *> Users(Op->users());
   for (auto *U : Users) {
+    if (!isa<Instruction>(U))
+      continue;
     if (isa<BitCastInst>(U) || isa<GetElementPtrInst>(U) || isSpvIntrinsic(U))
       continue;
-    U->replaceUsesOfWith(Op, PtrCasted);
+    U->replaceUsesOfWith(
+        Op, buildSpvPtrcast(dyn_cast<Instruction>(U)->getParent()->getParent(),
+                            Op, ElemTy));
   }
 }
 
 void SPIRVEmitIntrinsics::propagateElemTypeRec(Value *Op, Type *PtrElemTy,
-                                               CallInst *PtrCasted) {
+                                               Type *CastElemTy) {
   if (!isNestedPointer(PtrElemTy))
     return;
   std::unordered_set<Value *> Visited;
-  propagateElemTypeRec(Op, PtrElemTy, PtrCasted, Visited);
+  propagateElemTypeRec(Op, PtrElemTy, CastElemTy, Visited);
 }
 
 void SPIRVEmitIntrinsics::propagateElemTypeRec(
-    Value *Op, Type *PtrElemTy, CallInst *PtrCasted,
+    Value *Op, Type *PtrElemTy, Type *CastElemTy,
     std::unordered_set<Value *> &Visited) {
   if (!Visited.insert(Op).second)
     return;
   SmallVector<User *> Users(Op->users());
   for (auto *U : Users) {
+    if (!isa<Instruction>(U))
+      continue;
     if (isa<BitCastInst>(U) || isSpvIntrinsic(U))
       continue;
     if (auto *Ref = dyn_cast<GetElementPtrInst>(U)) {
@@ -490,12 +497,13 @@ void SPIRVEmitIntrinsics::propagateElemTypeRec(
         updateAssignType(AssignCI, Ref, PoisonValue::get(NewElemTy));
         // recursively propagate change
         if (isNestedPointer(NewElemTy))
-          propagateElemTypeRec(Ref, NewElemTy, buildSpvPtrcast(Ref, PrevElemTy),
-                               Visited);
+          propagateElemTypeRec(Ref, NewElemTy, PrevElemTy, Visited);
       }
       continue;
     }
-    U->replaceUsesOfWith(Op, PtrCasted);
+    U->replaceUsesOfWith(
+        Op, buildSpvPtrcast(dyn_cast<Instruction>(U)->getParent()->getParent(),
+                            Op, CastElemTy));
   }
 }
 
@@ -1088,13 +1096,12 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
         GR->addAssignPtrTypeInstr(Op, CI);
       } else {
         updateAssignType(AssignCI, Op, OpTyVal);
-        propagateElemTypeRec(
-            Op, KnownElemTy,
-            buildSpvPtrcast(Op, GR->findDeducedElementType(Op)));
+        propagateElemTypeRec(Op, KnownElemTy, GR->findDeducedElementType(Op));
       }
     } else {
       eraseTodoType(Op);
-      CallInst *PtrCastI = buildSpvPtrcast(Op, KnownElemTy);
+      CallInst *PtrCastI =
+          buildSpvPtrcast(I->getParent()->getParent(), Op, KnownElemTy);
       if (OpIt.second == std::numeric_limits<unsigned>::max())
         dyn_cast<CallInst>(I)->setCalledOperand(PtrCastI);
       else
@@ -2280,7 +2287,7 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
             propagateElemType(CI, ElemTy);
           } else {
             updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
-            propagateElemTypeRec(CI, ElemTy, buildSpvPtrcast(CI, KnownTy));
+            propagateElemTypeRec(CI, ElemTy, KnownTy);
           }
           eraseTodoType(Op);
           continue;
diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
index c2db50e7aa394d..a55c4da14d1521 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
@@ -7,9 +7,8 @@
 ; CHECK-DAG: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]]
 ; CHECK: %[[#R1:]] = OpFunctionCall %[[#PtrChar]] %[[#]]
 ; CHECK: %[[#R2:]] = OpFunctionCall %[[#PtrInt]] %[[#]]
-; CHECK: %[[#Casted:]] = OpBitcast %[[#PtrChar]] %[[#R2]]
-; CHECK: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#Casted]] %[[#]]
-; CHECK: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#Casted]] %[[#]]
+; CHECK-2: %[[#]] = OpBitcast %[[#PtrChar]] %[[#R2]]
+; CHECK-2: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#]] %[[#]]
 
 define ptr @foo(i1 %arg) {
 entry:

>From 5923a275f7c6b1f755406cac63e2c2e94a6a20fe Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Tue, 26 Nov 2024 14:18:22 -0800
Subject: [PATCH 11/17] improve type inference

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp        | 12 ++++++++----
 .../SPIRV/pointers/phi-valid-operand-types.ll        |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index b71655f33045ec..e34534fcab3c11 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -450,7 +450,6 @@ CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Function *F, Value *Op,
 }
 
 void SPIRVEmitIntrinsics::propagateElemType(Value *Op, Type *ElemTy) {
-  // CallInst *PtrCasted = buildSpvPtrcast(Op, ElemTy);
   SmallVector<User *> Users(Op->users());
   for (auto *U : Users) {
     if (!isa<Instruction>(U))
@@ -1095,8 +1094,9 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
                             {B.getInt32(getPointerAddressSpace(OpTy))}, B);
         GR->addAssignPtrTypeInstr(Op, CI);
       } else {
+        Type *PrevElemTy = GR->findDeducedElementType(Op);
         updateAssignType(AssignCI, Op, OpTyVal);
-        propagateElemTypeRec(Op, KnownElemTy, GR->findDeducedElementType(Op));
+        propagateElemTypeRec(Op, KnownElemTy, PrevElemTy);
       }
     } else {
       eraseTodoType(Op);
@@ -1412,10 +1412,13 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
       if (mayUpdateOpType(Pointer)) {
         //  If this wouldn't be the first spv_ptrcast but existing type info is
         //  uncomplete, update spv_assign_ptr_type arguments.
-        if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer))
+        if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer)) {
+          Type *PrevElemTy = GR->findDeducedElementType(Pointer);
           updateAssignType(AssignCI, Pointer, ExpectedElementVal);
-        else
+          propagateElemTypeRec(Pointer, ExpectedElementType, PrevElemTy);
+        } else {
           buildAssignPtr(B, ExpectedElementType, Pointer);
+        }
         return;
       }
     }
@@ -1428,6 +1431,7 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
   I->setOperand(OperandToReplace, PtrCastI);
   // We need to set up a pointee type for the newly created spv_ptrcast.
   buildAssignPtr(B, ExpectedElementType, PtrCastI);
+  //propagateElemType(Pointer, ExpectedElementType);
 }
 
 void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
index a55c4da14d1521..f4c8c5a79bcb77 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll
@@ -7,8 +7,8 @@
 ; CHECK-DAG: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]]
 ; CHECK: %[[#R1:]] = OpFunctionCall %[[#PtrChar]] %[[#]]
 ; CHECK: %[[#R2:]] = OpFunctionCall %[[#PtrInt]] %[[#]]
-; CHECK-2: %[[#]] = OpBitcast %[[#PtrChar]] %[[#R2]]
-; CHECK-2: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#]] %[[#]]
+; CHECK: %[[#Casted:]] = OpBitcast %[[#PtrChar]] %[[#R2]]
+; CHECK: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#Casted]] %[[#]]
 
 define ptr @foo(i1 %arg) {
 entry:

>From 7c53f10a9fc9dca2296a8860812fa512ec8c19fa Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Tue, 26 Nov 2024 14:49:45 -0800
Subject: [PATCH 12/17] clean comments; add TODOs

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 154 +-----------------
 1 file changed, 9 insertions(+), 145 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index e34534fcab3c11..65acd86d1f41df 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -62,10 +62,6 @@ inline MetadataAsValue *buildMD(Value *Arg) {
       Ctx, MDNode::get(Ctx, ValueAsMetadata::getConstant(Arg)));
 }
 
-inline bool mayUpdateOpType(Value *Op) {
-  return !(isa<CallInst>(Op) || isa<GetElementPtrInst>(Op));
-}
-
 class SPIRVEmitIntrinsics
     : public ModulePass,
       public InstVisitor<SPIRVEmitIntrinsics, Instruction *> {
@@ -101,11 +97,6 @@ class SPIRVEmitIntrinsics
     auto It = TodoType.find(Op);
     return It != TodoType.end() && It->second;
   }
-  // bool mayUpdateOpType(Value *Op) {
-  //   if (isa<CallInst>(Op) || isa<GetElementPtrInst>(Op))
-  //     return false;
-  //   return isTodoType(Op);
-  // }
 
   // well known result types of builtins
   enum WellKnownTypes { Event };
@@ -408,7 +399,6 @@ void SPIRVEmitIntrinsics::buildAssignPtr(IRBuilder<> &B, Type *ElemTy,
     GR->addDeducedElementType(Arg, ElemTy);
     GR->addAssignPtrTypeInstr(Arg, AssignPtrTyCI);
   } else {
-    assert(mayUpdateOpType(Arg) || "Forbidden to update assigned type");
     updateAssignType(AssignPtrTyCI, Arg, OfType);
   }
 }
@@ -695,12 +685,6 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
                                      Visited, UnknownElemTypeI8);
       else if (Type *KnownRetTy = GR->findDeducedElementType(CalledF))
         Ty = KnownRetTy;
-      /*
-            else {
-              Ty = IntegerType::getInt8Ty(I->getContext());
-              insertTodoType(I);
-            }
-      */
     }
   }
 
@@ -950,8 +934,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     Uncomplete = isTodoType(I);
     Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
-    // TODO: ensure that Ref->getPointerOperand() has
-    // Ref->getSourceElementType()
+    // TODO: ensure that getPointerOperand() and GEP result type are consistent
     if (GR->findDeducedElementType(Ref->getPointerOperand()))
       return;
     KnownElemTy = Ref->getSourceElementType();
@@ -967,11 +950,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     Ops.push_back(std::make_pair(Ref->getPointerOperand(),
                                  LoadInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
-    // if (IsKernelArgInt8(Ref->getParent()->getParent(), Ref))
-    //   return;
     if (!(KnownElemTy =
-              reconstructType(Ref->getValueOperand(),
-                              false /*UnknownElemTypeI8*/, IsPostprocessing)))
+              reconstructType(Ref->getValueOperand(), false, IsPostprocessing)))
       return;
     Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
     if (PointeeTy && !isUntypedPointerTy(PointeeTy))
@@ -1076,8 +1056,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
       continue;
     Value *OpTyVal = PoisonValue::get(KnownElemTy);
     Type *OpTy = Op->getType();
-    if ( // mayUpdateOpType(Op) &&
-        (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op))) {
+    if (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)) {
       GR->addDeducedElementType(Op, KnownElemTy);
       // check if KnownElemTy is complete
       if (!Uncomplete)
@@ -1344,7 +1323,6 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
 
   // Our previous guess about the type seems to be wrong, let's update
   // inferred type according to a new, more precise type information.
-  assert(mayUpdateOpType(V) || "Forbidden to update assigned type");
   updateAssignType(AssignCI, V, PoisonValue::get(AssignedType));
 }
 
@@ -1409,7 +1387,7 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
       return;
     } else if (isTodoType(Pointer)) {
       eraseTodoType(Pointer);
-      if (mayUpdateOpType(Pointer)) {
+      if (!isa<CallInst>(Pointer) && !isa<GetElementPtrInst>(Pointer)) {
         //  If this wouldn't be the first spv_ptrcast but existing type info is
         //  uncomplete, update spv_assign_ptr_type arguments.
         if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer)) {
@@ -1431,7 +1409,6 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
   I->setOperand(OperandToReplace, PtrCastI);
   // We need to set up a pointee type for the newly created spv_ptrcast.
   buildAssignPtr(B, ExpectedElementType, PtrCastI);
-  //propagateElemType(Pointer, ExpectedElementType);
 }
 
 void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
@@ -1455,8 +1432,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
   } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     Value *Pointer = LI->getPointerOperand();
     Type *OpTy = LI->getType();
-    if (auto *PtrTy = dyn_cast<PointerType>(
-            OpTy)) { // TODO: isNestedPointer or rather getNestedPointerAS()
+    if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
+      // TODO: isNestedPointer() instead of dyn_cast<PointerType>
       if (Type *ElemTy = GR->findDeducedElementType(LI)) {
         OpTy = getTypedPointerWrapper(ElemTy, PtrTy->getAddressSpace());
       } else {
@@ -1468,24 +1445,6 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
     }
     return replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
   } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
-    /*
-        Value *Pointer = GEPI->getPointerOperand();
-        Type *OpTy = GEPI->getSourceElementType();
-        if (auto *PtrTy = dyn_cast<PointerType>(
-                OpTy)) { // TODO: isNestedPointer or rather getNestedPointerAS()
-          if (Type *ElemTy = GR->findDeducedElementType(Pointer)) {
-            return;
-          } else {
-            if (Type *ElemTy = deduceElementTypeHelper(Pointer, false))
-              OpTy = ElemTy;
-            else
-              insertTodoType(Pointer);
-          }
-        }
-        //::getPointeeType(OpTy)
-        // isNestedPointer
-        return replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
-    */
     Value *Pointer = GEPI->getPointerOperand();
     Type *OpTy = GEPI->getSourceElementType();
     replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
@@ -1494,7 +1453,7 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
     return;
   }
 
-  // TODO: review and maybe merge with existing logics the following ...:
+  // TODO: review and merge with existing logics:
   // Handle calls to builtins (non-intrinsics):
   CallInst *CI = dyn_cast<CallInst>(I);
   if (!CI || CI->isIndirectCall() || CI->isInlineAsm() ||
@@ -1796,53 +1755,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
         }
       }
     }
-  } /*else if (auto *Ref = dyn_cast<StoreInst>(I)) {
-    if (!IsKernelArgInt8(CurrF, Ref)) {
-      Type *ElemTy = reconstructType(Ref->getValueOperand(), false, false);
-      if (ElemTy) {
-        setInsertPointAfterDef(B, I);
-        buildAssignPtr(B, ElemTy, Ref->getPointerOperand());
-      }
-    }
-  } */ /*else if (auto *Ref = dyn_cast<StoreInst>(I)) {
-    if (!IsKernelArgInt8(CurrF, Ref)) {
-      Type *ElemTy =
-          reconstructType(Ref->getValueOperand(), true,
-                          false);
-      assert(ElemTy);
-      setInsertPointAfterDef(B, I);
-      buildAssignPtr(B, ElemTy, Ref->getPointerOperand());
-    }
-  } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
-    if (IsKernelArgInt8(CurrF, Ref)) {
-      // TODO: rework this outdated call
-      replacePointerOperandWithPtrCast(
-          I, Ref->getPointerOperand(),
-          IntegerType::getInt8Ty(CurrF->getContext()), 0, B);
-    } else {
-      Type *ElemTy = reconstructType(GR, Ref->getValueOperand(), true);
-      assert(ElemTy);
-      setInsertPointAfterDef(B, I);
-      buildAssignPtr(B, ElemTy, Ref->getPointerOperand());
-    }
-  } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
-    Value *Op = Ref->getPointerOperand();
-    Type *ElemTy = Ref->getSourceElementType();
-    if (isUntypedPointerTy(ElemTy))
-      insertTodoType(Op);
-    setInsertPointAfterDef(B, I);
-    buildAssignPtr(B, ElemTy, Op);
-    // TODO: rework this outdated call
-    // replacePointerOperandWithPtrCast(I, Op,
-    //                                 ElemTy, 0, B);
-    // Ty = Ref->getResultElementType();
-    //  TODO: GR->findDeduceElementType()
-    //   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
-    //     return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(),
-    //                                             GEPI->getSourceElementType(),
-    //                                             0, B);
-    //   }
-  }*/
+  }
 
   Type *Ty = I->getType();
   if (!IsKnown && !Ty->isVoidTy() && !isPointerTy(Ty) && requireAssignType(I)) {
@@ -2037,11 +1950,6 @@ void SPIRVEmitIntrinsics::processParamTypesByFunHeader(Function *F,
       buildAssignPtr(B, ElemTy, Arg);
       continue;
     }
-    // ElemTy = deduceFunParamElementType(F, OpIdx);
-    // if (ElemTy) {
-    //   buildAssignPtr(B, ElemTy, Arg);
-    //   continue;
-    // }
     if (HaveFunPtrs) {
       for (User *U : Arg->users()) {
         CallInst *CI = dyn_cast<CallInst>(U);
@@ -2057,33 +1965,6 @@ void SPIRVEmitIntrinsics::processParamTypesByFunHeader(Function *F,
         }
       }
     }
-    /*
-        for (User *U : Arg->users()) {
-          if (CallInst *CI = dyn_cast<CallInst>(U)) {
-            if (!CI->isIndirectCall())
-              deduceOperandElementTypeCalledFunction(CI, Ops, ElemTy);
-            else if (HaveFunPtrs)
-              deduceOperandElementTypeFunctionPointer(CI, Ops, ElemTy, false);
-          }
-        }
-    */
-    /*
-        if (HaveFunPtrs) {
-          for (User *U : Arg->users()) {
-            CallInst *CI = dyn_cast<CallInst>(U);
-            if (CI && !isa<IntrinsicInst>(CI) && CI->isIndirectCall() &&
-                CI->getCalledOperand() == Arg &&
-                CI->getParent()->getParent() == CurrF) {
-              SmallVector<std::pair<Value *, unsigned>> Ops;
-              deduceOperandElementTypeFunctionPointer(CI, Ops, ElemTy, false);
-              if (ElemTy) {
-                buildAssignPtr(B, ElemTy, Arg);
-                break;
-              }
-            }
-          }
-        }
-    */
   }
 }
 
@@ -2295,14 +2176,6 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
           }
           eraseTodoType(Op);
           continue;
-          /*
-          if (mayUpdateOpType(CI)) {
-            updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
-            propagateElemType(CI, KnownTy);
-          } else {
-            propagateElemType(CI, ElemTy);
-          }
-          */
         }
       }
     }
@@ -2355,16 +2228,7 @@ bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
 
   if (HaveFunPtrs)
     Changed |= processFunctionPointers(M);
-  /*
-    TodoType.clear();
-    TodoTypeSz = 0;
-    std::unordered_set<Value *> Visited;
-    for (auto &F : M) {
-      CurrF = &F;
-      for (auto &I : instructions(F))
-        deduceOperandElementType(&I, nullptr, true);
-    }
-  */
+
   return Changed;
 }
 

>From 6d0c3111eeaeb99169bcac682e5e46e9750c5483 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 27 Nov 2024 06:52:24 -0800
Subject: [PATCH 13/17] improve GEP support

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 161 +++++++++++++-----
 llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp   |  10 +-
 llvm/lib/Target/SPIRV/SPIRVUtils.h            |   7 +
 3 files changed, 133 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 65acd86d1f41df..f828e5e81aa248 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -17,6 +17,7 @@
 #include "SPIRVSubtarget.h"
 #include "SPIRVTargetMachine.h"
 #include "SPIRVUtils.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstVisitor.h"
@@ -80,7 +81,8 @@ class SPIRVEmitIntrinsics
   unsigned TodoTypeSz = 0;
   DenseMap<Value *, bool> TodoType;
   void insertTodoType(Value *Op) {
-    if (CanTodoType) {
+    // TODO: add isa<CallInst>(Op) to no-insert
+    if (CanTodoType && !isa<GetElementPtrInst>(Op)) {
       auto It = TodoType.try_emplace(Op, true);
       if (It.second)
         ++TodoTypeSz;
@@ -94,9 +96,14 @@ class SPIRVEmitIntrinsics
     }
   }
   bool isTodoType(Value *Op) {
+    if (isa<GetElementPtrInst>(Op))
+      return false;
     auto It = TodoType.find(Op);
     return It != TodoType.end() && It->second;
   }
+  // a register of Instructions that were visited by deduceOperandElementType()
+  // to validate operand types with an instruction
+  std::unordered_set<Instruction *> TypeValidated;
 
   // well known result types of builtins
   enum WellKnownTypes { Event };
@@ -178,10 +185,17 @@ class SPIRVEmitIntrinsics
       Type *&KnownElemTy, bool IsPostprocessing);
 
   CallInst *buildSpvPtrcast(Function *F, Value *Op, Type *ElemTy);
-  void propagateElemType(Value *Op, Type *ElemTy);
-  void propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy);
+  void replaceUsesOfWithSpvPtrcast(Value *Op, Type *ElemTy, Instruction *I,
+                                   DenseMap<Function *, CallInst *> Ptrcasts);
+  void propagateElemType(Value *Op, Type *ElemTy,
+                         DenseSet<std::pair<Value *, Value *>> &VisitedSubst);
+  void
+  propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy,
+                       DenseSet<std::pair<Value *, Value *>> &VisitedSubst);
   void propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy,
-                            std::unordered_set<Value *> &Visited);
+                            DenseSet<std::pair<Value *, Value *>> &VisitedSubst,
+                            std::unordered_set<Value *> &Visited,
+                            DenseMap<Function *, CallInst *> Ptrcasts);
 
   void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true);
 
@@ -439,38 +453,63 @@ CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Function *F, Value *Op,
   return PtrCasted;
 }
 
-void SPIRVEmitIntrinsics::propagateElemType(Value *Op, Type *ElemTy) {
+void SPIRVEmitIntrinsics::replaceUsesOfWithSpvPtrcast(
+    Value *Op, Type *ElemTy, Instruction *I,
+    DenseMap<Function *, CallInst *> Ptrcasts) {
+  Function *F = I->getParent()->getParent();
+  CallInst *PtrCastedI = nullptr;
+  auto It = Ptrcasts.find(F);
+  if (It == Ptrcasts.end()) {
+    PtrCastedI = buildSpvPtrcast(F, Op, ElemTy);
+    Ptrcasts[F] = PtrCastedI;
+  } else {
+    PtrCastedI = It->second;
+  }
+  I->replaceUsesOfWith(Op, PtrCastedI);
+}
+
+void SPIRVEmitIntrinsics::propagateElemType(
+    Value *Op, Type *ElemTy,
+    DenseSet<std::pair<Value *, Value *>> &VisitedSubst) {
+  DenseMap<Function *, CallInst *> Ptrcasts;
   SmallVector<User *> Users(Op->users());
   for (auto *U : Users) {
-    if (!isa<Instruction>(U))
+    if (!isa<Instruction>(U) || isa<BitCastInst>(U) || isSpvIntrinsic(U))
       continue;
-    if (isa<BitCastInst>(U) || isa<GetElementPtrInst>(U) || isSpvIntrinsic(U))
+    if (!VisitedSubst.insert(std::make_pair(U, Op)).second)
       continue;
-    U->replaceUsesOfWith(
-        Op, buildSpvPtrcast(dyn_cast<Instruction>(U)->getParent()->getParent(),
-                            Op, ElemTy));
+    Instruction *UI = dyn_cast<Instruction>(U);
+    // If the instruction was validated already, we need to keep it valid by
+    // keeping current Op type.
+    if (isa<GetElementPtrInst>(UI) ||
+        TypeValidated.find(UI) != TypeValidated.end())
+      replaceUsesOfWithSpvPtrcast(Op, ElemTy, UI, Ptrcasts);
   }
 }
 
-void SPIRVEmitIntrinsics::propagateElemTypeRec(Value *Op, Type *PtrElemTy,
-                                               Type *CastElemTy) {
-  if (!isNestedPointer(PtrElemTy))
-    return;
+void SPIRVEmitIntrinsics::propagateElemTypeRec(
+    Value *Op, Type *PtrElemTy, Type *CastElemTy,
+    DenseSet<std::pair<Value *, Value *>> &VisitedSubst) {
   std::unordered_set<Value *> Visited;
-  propagateElemTypeRec(Op, PtrElemTy, CastElemTy, Visited);
+  DenseMap<Function *, CallInst *> Ptrcasts;
+  propagateElemTypeRec(Op, PtrElemTy, CastElemTy, VisitedSubst, Visited,
+                       Ptrcasts);
 }
 
 void SPIRVEmitIntrinsics::propagateElemTypeRec(
     Value *Op, Type *PtrElemTy, Type *CastElemTy,
-    std::unordered_set<Value *> &Visited) {
+    DenseSet<std::pair<Value *, Value *>> &VisitedSubst,
+    std::unordered_set<Value *> &Visited,
+    DenseMap<Function *, CallInst *> Ptrcasts) {
   if (!Visited.insert(Op).second)
     return;
   SmallVector<User *> Users(Op->users());
   for (auto *U : Users) {
-    if (!isa<Instruction>(U))
+    if (!isa<Instruction>(U) || isa<BitCastInst>(U) || isSpvIntrinsic(U))
       continue;
-    if (isa<BitCastInst>(U) || isSpvIntrinsic(U))
+    if (!VisitedSubst.insert(std::make_pair(U, Op)).second)
       continue;
+    /*
     if (auto *Ref = dyn_cast<GetElementPtrInst>(U)) {
       CallInst *AssignCI = GR->findAssignPtrTypeInstr(Ref);
       if (AssignCI && Ref->getPointerOperand() == Op) {
@@ -485,14 +524,18 @@ void SPIRVEmitIntrinsics::propagateElemTypeRec(
         assert(NewElemTy && "Expected valid GEP indices");
         updateAssignType(AssignCI, Ref, PoisonValue::get(NewElemTy));
         // recursively propagate change
-        if (isNestedPointer(NewElemTy))
-          propagateElemTypeRec(Ref, NewElemTy, PrevElemTy, Visited);
+        propagateElemTypeRec(Ref, NewElemTy, PrevElemTy, VisitedSubst, Visited,
+                             Ptrcasts);
       }
       continue;
     }
-    U->replaceUsesOfWith(
-        Op, buildSpvPtrcast(dyn_cast<Instruction>(U)->getParent()->getParent(),
-                            Op, CastElemTy));
+    */
+    Instruction *UI = dyn_cast<Instruction>(U);
+    // If the instruction was validated already, we need to keep it valid by
+    // keeping current Op type.
+    if (isa<GetElementPtrInst>(UI) ||
+        TypeValidated.find(UI) != TypeValidated.end())
+      replaceUsesOfWithSpvPtrcast(Op, CastElemTy, UI, Ptrcasts);
   }
 }
 
@@ -600,13 +643,34 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
   if (auto *Ref = dyn_cast<AllocaInst>(I)) {
     maybeAssignPtrType(Ty, I, Ref->getAllocatedType(), UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
-    Ty = Ref->getResultElementType();
+    // TODO: not sure if GetElementPtrInst::getTypeAtIndex() does anything
+    // useful here
+    if (isNestedPointer(Ref->getSourceElementType())) {
+      Ty = Ref->getSourceElementType();
+      for (Use &U : drop_begin(Ref->indices()))
+        Ty = GetElementPtrInst::getTypeAtIndex(Ty, U.get());
+    } else {
+      Ty = Ref->getResultElementType();
+    }
+    /*
+    if (Type *PtrElemTy = GR->findDeducedElementType(Ref->getPointerOperand()))
+    { Ty = PtrElemTy; for (Use &U : drop_begin(Ref->indices())) Ty =
+    GetElementPtrInst::getTypeAtIndex(Ty, U.get()); if
+    (isTodoType(Ref->getPointerOperand())) insertTodoType(Ref); } else if
+    (isNestedPointer(Ref->getSourceElementType())) { Ty =
+    Ref->getSourceElementType(); for (Use &U : drop_begin(Ref->indices())) Ty =
+    GetElementPtrInst::getTypeAtIndex(Ty, U.get()); } else { Ty =
+    Ref->getResultElementType();
+    }
+    */
+    /*
     if (isNestedPointer(Ref->getSourceElementType())) {
       Type *PtrElemTy = GR->findDeducedElementType(Ref->getPointerOperand());
       Ty = PtrElemTy ? PtrElemTy : Ref->getSourceElementType();
       for (Use &U : drop_begin(Ref->indices()))
         Ty = GetElementPtrInst::getTypeAtIndex(Ty, U.get());
     }
+    */
   } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
     Value *Op = Ref->getPointerOperand();
     Type *KnownTy = GR->findDeducedElementType(Op);
@@ -934,7 +998,6 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     Uncomplete = isTodoType(I);
     Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
-    // TODO: ensure that getPointerOperand() and GEP result type are consistent
     if (GR->findDeducedElementType(Ref->getPointerOperand()))
       return;
     KnownElemTy = Ref->getSourceElementType();
@@ -992,6 +1055,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
         GR->addDeducedElementType(CurrF, OpElemTy);
         GR->addReturnType(CurrF, TypedPointerType::get(
                                      OpElemTy, getPointerAddressSpace(RetTy)));
+        DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+            std::make_pair(I, Op)};
         for (User *U : CurrF->users()) {
           CallInst *CI = dyn_cast<CallInst>(U);
           if (!CI || CI->getCalledFunction() != CurrF)
@@ -999,10 +1064,11 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
           if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(CI)) {
             if (Type *PrevElemTy = GR->findDeducedElementType(CI)) {
               updateAssignType(AssignCI, CI, PoisonValue::get(OpElemTy));
-              propagateElemType(CI, PrevElemTy);
+              propagateElemType(CI, PrevElemTy, VisitedSubst);
             }
           }
         }
+        TypeValidated.insert(I);
       }
       return;
     }
@@ -1075,7 +1141,9 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
       } else {
         Type *PrevElemTy = GR->findDeducedElementType(Op);
         updateAssignType(AssignCI, Op, OpTyVal);
-        propagateElemTypeRec(Op, KnownElemTy, PrevElemTy);
+        DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+            std::make_pair(I, Op)};
+        propagateElemTypeRec(Op, KnownElemTy, PrevElemTy, VisitedSubst);
       }
     } else {
       eraseTodoType(Op);
@@ -1087,6 +1155,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
         I->setOperand(OpIt.second, PtrCastI);
     }
   }
+  TypeValidated.insert(I);
 }
 
 void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
@@ -1293,10 +1362,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
   Type *VTy = V->getType();
 
   // A couple of sanity checks.
-  assert((isPointerTy(VTy) ||
-          (isa<TargetExtType>(VTy) &&
-           isTypedPointerWrapper(dyn_cast<TargetExtType>(VTy)))) &&
-         "Expect a pointer type!");
+  assert((isPointerTy(VTy)) && "Expect a pointer type!");
   if (Type *ElemTy = getPointeeType(VTy))
     if (ElemTy != AssignedType)
       report_fatal_error("Unexpected pointer element type!");
@@ -1329,6 +1395,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
 void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
     Instruction *I, Value *Pointer, Type *ExpectedElementType,
     unsigned OperandToReplace, IRBuilder<> &B) {
+  TypeValidated.insert(I);
   // If Pointer is the result of nop BitCastInst (ptr -> ptr), use the source
   // pointer instead. The BitCastInst should be later removed when visited.
   while (BitCastInst *BC = dyn_cast<BitCastInst>(Pointer))
@@ -1392,8 +1459,11 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
         //  uncomplete, update spv_assign_ptr_type arguments.
         if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer)) {
           Type *PrevElemTy = GR->findDeducedElementType(Pointer);
+          assert(PrevElemTy);
+          DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+              std::make_pair(I, Pointer)};
           updateAssignType(AssignCI, Pointer, ExpectedElementVal);
-          propagateElemTypeRec(Pointer, ExpectedElementType, PrevElemTy);
+          propagateElemType(Pointer, PrevElemTy, VisitedSubst);
         } else {
           buildAssignPtr(B, ExpectedElementType, Pointer);
         }
@@ -1422,15 +1492,20 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
   }
   if (SI) {
     Value *Op = SI->getValueOperand();
+    Value *Pointer = SI->getPointerOperand();
+    // if (!GR->findDeducedElementType(Pointer) || isTodoType(Pointer)) {
     Type *OpTy = Op->getType();
     if (auto *OpI = dyn_cast<Instruction>(Op))
       OpTy = restoreMutatedType(GR, OpI, OpTy);
     if (OpTy == Op->getType())
       OpTy = deduceElementTypeByValueDeep(OpTy, Op, false);
-    return replacePointerOperandWithPtrCast(I, SI->getPointerOperand(), OpTy, 1,
-                                            B);
-  } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    replacePointerOperandWithPtrCast(I, Pointer, OpTy, 1, B);
+    //}
+    return;
+  }
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     Value *Pointer = LI->getPointerOperand();
+    // if (!GR->findDeducedElementType(Pointer) || isTodoType(Pointer)) {
     Type *OpTy = LI->getType();
     if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
       // TODO: isNestedPointer() instead of dyn_cast<PointerType>
@@ -1443,8 +1518,11 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
           insertTodoType(Pointer);
       }
     }
-    return replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
-  } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
+    //}
+    return;
+  }
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
     Value *Pointer = GEPI->getPointerOperand();
     Type *OpTy = GEPI->getSourceElementType();
     replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
@@ -1522,7 +1600,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
     if (!ExpectedType || ExpectedType->isVoidTy())
       continue;
 
-    if (ExpectedType->isTargetExtTy())
+    if (ExpectedType->isTargetExtTy() &&
+        !isTypedPointerWrapper(cast<TargetExtType>(ExpectedType)))
       insertAssignPtrTypeTargetExt(cast<TargetExtType>(ExpectedType),
                                    ArgOperand, B);
     else
@@ -2155,6 +2234,7 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
   unsigned SzTodo = TodoTypeSz;
   DenseMap<Value *, SmallPtrSet<Value *, 4>> ToProcess;
   for (auto [Op, Enabled] : TodoType) {
+    // TODO: add isa<CallInst>(Op) to continue
     if (!Enabled || isa<GetElementPtrInst>(Op))
       continue;
     CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op);
@@ -2168,11 +2248,12 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
       std::unordered_set<Value *> Visited;
       if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) {
         if (ElemTy != KnownTy) {
+          DenseSet<std::pair<Value *, Value *>> VisitedSubst;
           if (isa<CallInst>(Op)) {
-            propagateElemType(CI, ElemTy);
+            propagateElemType(CI, ElemTy, VisitedSubst);
           } else {
             updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
-            propagateElemTypeRec(CI, ElemTy, KnownTy);
+            propagateElemTypeRec(CI, ElemTy, KnownTy, VisitedSubst);
           }
           eraseTodoType(Op);
           continue;
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index d5b81bf46c804e..f67b28f1edd1c8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -394,11 +394,11 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const {
       case SPIRV::OpGenericCastToPtr:
         validateAccessChain(STI, MRI, GR, MI);
         break;
-      case SPIRV::OpPtrAccessChain:
-      case SPIRV::OpInBoundsPtrAccessChain:
-        if (MI.getNumOperands() == 4)
-          validateAccessChain(STI, MRI, GR, MI);
-        break;
+//      case SPIRV::OpPtrAccessChain:
+//      case SPIRV::OpInBoundsPtrAccessChain:
+//        if (MI.getNumOperands() == 4)
+//          validateAccessChain(STI, MRI, GR, MI);
+//        break;
 
       case SPIRV::OpFunctionCall:
         // ensure there is no mismatch between actual and expected arg types:
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 368572e311269c..17685162ce2bfb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -282,6 +282,13 @@ inline bool isTypedPointerWrapper(const TargetExtType *ExtTy) {
          ExtTy->getNumTypeParameters() == 1;
 }
 
+// True if this is an instance of PointerType or TypedPointerType.
+inline bool isPointerTyOrWrapper(const Type *Ty) {
+  if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
+    return isTypedPointerWrapper(ExtTy);
+  return isPointerTy(Ty);
+}
+
 inline Type *applyWrappers(Type *Ty) {
   if (auto *ExtTy = dyn_cast<TargetExtType>(Ty)) {
     if (isTypedPointerWrapper(ExtTy))

>From abcbe92690108620d58f8f6c83d799cc50bb748d Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 27 Nov 2024 11:08:24 -0800
Subject: [PATCH 14/17] fix types propagation after update

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp |   5 +-
 .../fp_two_calls.ll                           |   8 +-
 .../cl_intel_sub_groups.ll                    |   3 +-
 .../SPIRV/validate/sycl-hier-par-basic.ll     | 974 ++++++++++++++++++
 4 files changed, 983 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index f828e5e81aa248..20bd9fe4fa72ce 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -1123,6 +1123,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
     Value *OpTyVal = PoisonValue::get(KnownElemTy);
     Type *OpTy = Op->getType();
     if (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)) {
+      Type *PrevElemTy = GR->findDeducedElementType(Op);
       GR->addDeducedElementType(Op, KnownElemTy);
       // check if KnownElemTy is complete
       if (!Uncomplete)
@@ -1139,7 +1140,6 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
                             {B.getInt32(getPointerAddressSpace(OpTy))}, B);
         GR->addAssignPtrTypeInstr(Op, CI);
       } else {
-        Type *PrevElemTy = GR->findDeducedElementType(Op);
         updateAssignType(AssignCI, Op, OpTyVal);
         DenseSet<std::pair<Value *, Value *>> VisitedSubst{
             std::make_pair(I, Op)};
@@ -2249,12 +2249,15 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
       if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) {
         if (ElemTy != KnownTy) {
           DenseSet<std::pair<Value *, Value *>> VisitedSubst;
+          propagateElemType(CI, ElemTy, VisitedSubst);
+          /*
           if (isa<CallInst>(Op)) {
             propagateElemType(CI, ElemTy, VisitedSubst);
           } else {
             updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
             propagateElemTypeRec(CI, ElemTy, KnownTy, VisitedSubst);
           }
+          */
           eraseTodoType(Op);
           continue;
         }
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
index 1b217c3bb92f16..9fa46f50a2e89b 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll
@@ -17,12 +17,10 @@
 ; CHECK-DAG: %[[TyPtrInt8:.*]] = OpTypePointer Function %[[TyInt8]]
 ; CHECK-DAG: %[[TyUncompleteFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrInt8]]
 ; CHECK-DAG: %[[TyPtrUncompleteFp:.*]] = OpTypePointer Function %[[TyUncompleteFp]]
-; CHECK-DAG: %[[TyUncompleteBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrUncompleteFp]] %[[TyPtrInt8]]
-; CHECK-DAG: %[[TyPtrUncompleteBar:.*]] = OpTypePointer Function %[[TyUncompleteBar]]
-; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrUncompleteBar]]
-; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]]
-; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrFp]] %[[TyPtrInt8]]
+; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrUncompleteFp]] %[[TyPtrInt8]]
 ; CHECK-DAG: %[[TyPtrBar:.*]] = OpTypePointer Function %[[TyBar]]
+; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrBar]]
+; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]]
 ; CHECK-DAG: %[[TyTest:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrFp]] %[[TyPtrInt8]] %[[TyPtrBar]]
 ; CHECK: %[[test]] = OpFunction %[[TyVoid]] None %[[TyTest]]
 ; CHECK: %[[fp]] = OpFunctionParameter %[[TyPtrFp]]
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
index 9374e154a0239f..13667f44389e7b 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll
@@ -37,7 +37,8 @@
 
 ; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
 
-; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - -filetype=obj | spirv-val %}
 
 ; CHECK-ERROR: LLVM ERROR: intel_sub_group_shuffle: the builtin requires the following SPIR-V extension: SPV_INTEL_subgroups
 
diff --git a/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll b/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll
new file mode 100644
index 00000000000000..77ed1d6fecf9ae
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll
@@ -0,0 +1,974 @@
+; This is an excerpt from the SYCL end-to-end test suite, cleaned out from unrelevant details,
+; that reproduced multiple cases of the issues when OpPhi's result type mismatches with operand types.
+; The only pass criterion is that spirv-val considers output valid.
+
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+%struct.PFWGFunctor = type { i64, i64, i32, i32, %"class.sycl::_V1::accessor" }
+%"class.sycl::_V1::accessor" = type { %"class.sycl::_V1::detail::AccessorImplDevice", %union.anon }
+%"class.sycl::_V1::detail::AccessorImplDevice" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range" }
+%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" }
+%"class.sycl::_V1::detail::array" = type { [1 x i64] }
+%union.anon = type { ptr addrspace(1) }
+%class.anon.2 = type { %"class.sycl::_V1::accessor" }
+%"class.sycl::_V1::group" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range" }
+%"class.sycl::_V1::group.15" = type { %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16" }
+%"class.sycl::_V1::range.16" = type { %"class.sycl::_V1::detail::array.17" }
+%"class.sycl::_V1::detail::array.17" = type { [2 x i64] }
+%"class.sycl::_V1::private_memory" = type { %struct.MyStruct }
+%struct.MyStruct = type { i32, i32 }
+
+ at GFunctor = internal addrspace(3) global %struct.PFWGFunctor undef, align 8
+ at WI.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WI.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WI.2 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WI.3 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WI.4 = internal unnamed_addr addrspace(3) global i32 undef, align 8
+ at WI.6 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCnt = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at __spirv_BuiltInNumWorkgroups = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at GKernel1 = internal addrspace(3) global %class.anon.2 undef, align 8
+ at GCnt2 = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at GKernel2 = internal addrspace(3) global %class.anon.2 undef, align 8
+ at GCnt3 = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at GKernel3 = internal addrspace(3) global %class.anon.2 undef, align 8
+ at GCnt4 = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at GKernel4 = internal addrspace(3) global %class.anon.2 undef, align 8
+ at GCnt5 = internal unnamed_addr addrspace(3) global i32 undef, align 4
+ at __spirv_BuiltInLocalInvocationIndex = external local_unnamed_addr addrspace(1) constant i64, align 8
+ at GThis = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GAsCast = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.1.0 = internal unnamed_addr addrspace(3) global i64 undef, align 16
+ at WGCopy.1.1 = internal unnamed_addr addrspace(3) global i64 undef, align 16
+ at WGCopy.1.2 = internal unnamed_addr addrspace(3) global i64 undef, align 16
+ at WGCopy.1.3 = internal unnamed_addr addrspace(3) global i64 undef, align 16
+ at WGCopy.1.4 = internal unnamed_addr addrspace(3) global i32 undef, align 16
+ at WGCopy.1.5 = internal unnamed_addr addrspace(3) global i32 undef, align 16
+ at WGCopy.1.6 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 16
+ at ArgShadow = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16
+ at GAsCast2 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp2 = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy.3.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.4.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.5.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.6.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at ArgShadow.7 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16
+ at GAscast3 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp3 = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy.9.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.10.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at ArgShadow.11 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16
+ at GAsCast4 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp4 = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy.13.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.13.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.14.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.14.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.15.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.15.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.16.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.16.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at ArgShadow.17 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group.15" undef, align 16
+ at GAsCast5 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at GCmp5 = internal unnamed_addr addrspace(3) global i1 undef, align 1
+ at WGCopy.19.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8
+ at WGCopy.20.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at WGCopy.20.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8
+ at ArgShadow.21 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16
+ at __spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInGlobalSize = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInWorkgroupId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+ at __spirv_BuiltInWorkgroupSize = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @_ZTS11PFWGFunctor(i64 noundef %_arg_wg_chunk, i64 noundef %_arg_range_length, i32 noundef %_arg_n_iter, i32 noundef %_arg_addend, ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+  %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8
+  store i64 %_arg_wg_chunk, ptr addrspace(3) @GFunctor, align 8
+  store i64 %_arg_range_length, ptr addrspace(3) undef, align 8
+  store i32 %_arg_n_iter, ptr addrspace(3) undef, align 8
+  store i32 %_arg_addend, ptr addrspace(3) undef, align 4
+  %0 = load i64, ptr %_arg_dev_ptr1, align 8
+  %1 = load i64, ptr %_arg_dev_ptr2, align 8
+  %2 = load i64, ptr %_arg_dev_ptr3, align 8
+  store i64 %2, ptr addrspace(3) undef, align 8
+  store i64 %0, ptr addrspace(3) undef, align 8
+  store i64 %1, ptr addrspace(3) undef, align 8
+  %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+  store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+  %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+  %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+  %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+  %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67)
+  store i64 %3, ptr %agg.tmp67, align 1
+  %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+  store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+  store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+  store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+  %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %cmpz15.i = icmp eq i64 %7, 0
+  br i1 %cmpz15.i, label %leader.i, label %merge.i
+
+leader.i:                                         ; preds = %entry
+  call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false)
+  br label %merge.i
+
+merge.i:                                          ; preds = %leader.i, %entry
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow, i64 32, i1 false)
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i:                                      ; preds = %merge.i
+  %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+  store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast, align 8
+  store ptr addrspace(4) addrspacecast (ptr addrspace(3) @GFunctor to ptr addrspace(4)), ptr addrspace(3) @GThis, align 8
+  %8 = load i32, ptr addrspace(3) undef, align 4
+  %9 = load i64, ptr addrspace(3) @GFunctor, align 8
+  %index.i = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+  %10 = load i64, ptr %index.i, align 8
+  %mul.i = mul i64 %9, %10
+  %localRange.i = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+  %11 = load i64, ptr %localRange.i, align 8
+  %12 = load i64, ptr addrspace(3) undef, align 8
+  store i64 %9, ptr addrspace(3) @WI.0, align 8
+  store i64 %11, ptr addrspace(3) @WI.1, align 8
+  store i64 %mul.i, ptr addrspace(3) @WI.2, align 8
+  store i64 %12, ptr addrspace(3) @WI.3, align 8
+  store i32 %8, ptr addrspace(3) @WI.4, align 8
+  store ptr addrspace(4) undef, ptr addrspace(3) @WI.6, align 8
+  store i32 0, ptr addrspace(3) @GCnt, align 4
+  br label %wg_cf.i
+
+wg_cf.i:                                          ; preds = %wg_leader.i, %merge.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %wg_val_this1.i = load ptr addrspace(4), ptr addrspace(3) @GThis, align 8
+  %n_iter.i = getelementptr inbounds i8, ptr addrspace(4) %wg_val_this1.i, i64 16
+  %13 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %wg_cf11.i, %wg_cf.i
+  %agg.tmp.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.0.0.copyload13, %wg_cf11.i ]
+  %agg.tmp.i.sroa.6.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.6.0.copyload15, %wg_cf11.i ]
+  %agg.tmp.i.sroa.7.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.7.0.copyload17, %wg_cf11.i ]
+  %agg.tmp.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.8.0.copyload19, %wg_cf11.i ]
+  %agg.tmp.i.sroa.9.0 = phi i32 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.9.0.copyload21, %wg_cf11.i ]
+  %agg.tmp.i.sroa.10.0 = phi i32 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.10.0.copyload23, %wg_cf11.i ]
+  %agg.tmp.i.sroa.11.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.11.0.copyload25, %wg_cf11.i ]
+  %this.addr.0.i = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GFunctor to ptr addrspace(4)), %wg_cf.i ], [ %mat_ld13.i, %wg_cf11.i ]
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %wg_leader4.i, label %wg_cf5.i
+
+wg_leader4.i:                                     ; preds = %for.cond.i
+  %14 = load i32, ptr addrspace(3) @GCnt, align 4
+  %15 = load i32, ptr addrspace(4) %n_iter.i, align 8
+  %cmp.i = icmp slt i32 %14, %15
+  store i1 %cmp.i, ptr addrspace(3) @GCmp, align 1
+  br label %wg_cf5.i
+
+wg_cf5.i:                                         ; preds = %wg_leader4.i, %for.cond.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp, align 1
+  br i1 %wg_val_cmp.i, label %for.body.i, label %_ZNK11PFWGFunctorclEN4sycl3_V15groupILi1EEE.exit
+
+for.body.i:                                       ; preds = %wg_cf5.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %wg_leader7.i, label %wg_cf8.i
+
+wg_leader7.i:                                     ; preds = %for.body.i
+  %agg.tmp.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WI.0, align 8
+  %agg.tmp.i.sroa.6.0.copyload = load i64, ptr addrspace(3) @WI.1, align 8
+  %agg.tmp.i.sroa.7.0.copyload = load i64, ptr addrspace(3) @WI.2, align 8
+  %agg.tmp.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WI.3, align 8
+  %agg.tmp.i.sroa.9.0.copyload = load i32, ptr addrspace(3) @WI.4, align 8
+  %agg.tmp.i.sroa.11.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WI.6, align 8
+  br label %wg_cf8.i
+
+wg_cf8.i:                                         ; preds = %wg_leader7.i, %for.body.i
+  %agg.tmp.i.sroa.0.1 = phi i64 [ %agg.tmp.i.sroa.0.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.0.0, %for.body.i ]
+  %agg.tmp.i.sroa.6.1 = phi i64 [ %agg.tmp.i.sroa.6.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.6.0, %for.body.i ]
+  %agg.tmp.i.sroa.7.1 = phi i64 [ %agg.tmp.i.sroa.7.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.7.0, %for.body.i ]
+  %agg.tmp.i.sroa.8.1 = phi i64 [ %agg.tmp.i.sroa.8.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.8.0, %for.body.i ]
+  %agg.tmp.i.sroa.9.1 = phi i32 [ %agg.tmp.i.sroa.9.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.9.0, %for.body.i ]
+  %agg.tmp.i.sroa.11.1 = phi ptr addrspace(4) [ %agg.tmp.i.sroa.11.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.11.0, %for.body.i ]
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i:                                        ; preds = %wg_cf8.i
+  store i64 %agg.tmp.i.sroa.0.1, ptr addrspace(3) @WGCopy.1.0, align 16
+  store i64 %agg.tmp.i.sroa.6.1, ptr addrspace(3) @WGCopy.1.1, align 16
+  store i64 %agg.tmp.i.sroa.7.1, ptr addrspace(3) @WGCopy.1.2, align 16
+  store i64 %agg.tmp.i.sroa.8.1, ptr addrspace(3) @WGCopy.1.3, align 16
+  store i32 %agg.tmp.i.sroa.9.1, ptr addrspace(3) @WGCopy.1.4, align 16
+  store i32 %agg.tmp.i.sroa.10.0, ptr addrspace(3) @WGCopy.1.5, align 16
+  store ptr addrspace(4) %agg.tmp.i.sroa.11.1, ptr addrspace(3) @WGCopy.1.6, align 16
+  store ptr addrspace(4) %this.addr.0.i, ptr addrspace(3) @WGCopy, align 8
+  br label %LeaderMat.i
+
+LeaderMat.i:                                      ; preds = %TestMat.i, %wg_cf8.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %mat_ld13.i = load ptr addrspace(4), ptr addrspace(3) @WGCopy, align 8
+  %agg.tmp.i.sroa.0.0.copyload13 = load i64, ptr addrspace(3) @WGCopy.1.0, align 16
+  %agg.tmp.i.sroa.6.0.copyload15 = load i64, ptr addrspace(3) @WGCopy.1.1, align 16
+  %agg.tmp.i.sroa.7.0.copyload17 = load i64, ptr addrspace(3) @WGCopy.1.2, align 16
+  %agg.tmp.i.sroa.8.0.copyload19 = load i64, ptr addrspace(3) @WGCopy.1.3, align 16
+  %agg.tmp.i.sroa.9.0.copyload21 = load i32, ptr addrspace(3) @WGCopy.1.4, align 16
+  %agg.tmp.i.sroa.10.0.copyload23 = load i32, ptr addrspace(3) @WGCopy.1.5, align 16
+  %agg.tmp.i.sroa.11.0.copyload25 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.1.6, align 16
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  %cmp.not.i.i = icmp ult i64 %13, %agg.tmp.i.sroa.0.0.copyload13
+  br i1 %cmp.not.i.i, label %if.end.i.i, label %lexit1
+
+if.end.i.i:                                       ; preds = %LeaderMat.i
+  %add.i.i = add i64 %agg.tmp.i.sroa.0.0.copyload13, %agg.tmp.i.sroa.6.0.copyload15
+  %sub.i.i = add i64 %add.i.i, -1
+  %div.i.i = udiv i64 %sub.i.i, %agg.tmp.i.sroa.6.0.copyload15
+  %mul.i.i = mul i64 %13, %div.i.i
+  %add4.i.i = add i64 %agg.tmp.i.sroa.7.0.copyload17, %mul.i.i
+  %add6.i.i = add i64 %add4.i.i, %div.i.i
+  %.sroa.speculated.i.i = call i64 @llvm.umin.i64(i64 %agg.tmp.i.sroa.8.0.copyload19, i64 %add6.i.i)
+  %16 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp.i.sroa.11.0.copyload25, i64 24
+  br label %for.cond.i.i
+
+for.cond.i.i:                                     ; preds = %for.body.i.i, %if.end.i.i
+  %ind.0.i.i = phi i64 [ %add4.i.i, %if.end.i.i ], [ %inc.i.i, %for.body.i.i ]
+  %cmp8.i.i = icmp ult i64 %ind.0.i.i, %.sroa.speculated.i.i
+  br i1 %cmp8.i.i, label %for.body.i.i, label %lexit1
+
+for.body.i.i:                                     ; preds = %for.cond.i.i
+  %17 = load ptr addrspace(1), ptr addrspace(4) %16, align 8
+  %arrayidx.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %17, i64 %ind.0.i.i
+  %18 = load i32, ptr addrspace(1) %arrayidx.i.i.i, align 4
+  %add10.i.i = add nsw i32 %18, %agg.tmp.i.sroa.9.0.copyload21
+  store i32 %add10.i.i, ptr addrspace(1) %arrayidx.i.i.i, align 4
+  %inc.i.i = add nuw i64 %ind.0.i.i, 1
+  br label %for.cond.i.i
+
+lexit1: ; preds = %for.cond.i.i, %LeaderMat.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %wg_leader10.i, label %wg_cf11.i
+
+wg_leader10.i:                                    ; preds = %lexit1
+  %19 = load i32, ptr addrspace(3) @GCnt, align 4
+  %inc.i = add nsw i32 %19, 1
+  store i32 %inc.i, ptr addrspace(3) @GCnt, align 4
+  br label %wg_cf11.i
+
+wg_cf11.i:                                        ; preds = %wg_leader10.i, %lexit1
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br label %for.cond.i
+
+_ZNK11PFWGFunctorclEN4sycl3_V15groupILi1EEE.exit: ; preds = %wg_cf5.i
+  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67)
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+; Function Attrs: convergent nounwind
+declare dso_local spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef, i32 noundef, i32 noundef)
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p3.i64(ptr noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg)
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.umin.i64(i64, i64)
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @bar(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+  %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8
+  %0 = load i64, ptr %_arg_dev_ptr1, align 8
+  %1 = load i64, ptr %_arg_dev_ptr2, align 8
+  %2 = load i64, ptr %_arg_dev_ptr3, align 8
+  store i64 %2, ptr addrspace(3) @GKernel1, align 8
+  store i64 %0, ptr addrspace(3) undef, align 8
+  store i64 %1, ptr addrspace(3) undef, align 8
+  %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+  store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+  %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+  %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+  %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+  %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67)
+  store i64 %3, ptr %agg.tmp67, align 1
+  %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+  store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+  store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+  store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+  %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %cmpz27.i = icmp eq i64 %7, 0
+  br i1 %cmpz27.i, label %leader.i, label %merge.i
+
+leader.i:                                         ; preds = %entry
+  call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.7, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false)
+  br label %merge.i
+
+merge.i:                                          ; preds = %leader.i, %entry
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.7, i64 32, i1 false)
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz27.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i:                                      ; preds = %merge.i
+  %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+  store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast2, align 8
+  store i32 0, ptr addrspace(3) @GCnt2, align 4
+  br label %wg_cf.i
+
+wg_cf.i:                                          ; preds = %wg_leader.i, %merge.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %8 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
+  %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+  %cmp.i.i.i.i.i.i = icmp ult i64 %8, 2147483648
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %wg_cf18.i, %wg_cf.i
+  %agg.tmp5.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %18, %wg_cf18.i ]
+  %agg.tmp4.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %17, %wg_cf18.i ]
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz27.i, label %wg_leader8.i, label %wg_cf9.i
+
+wg_leader8.i:                                     ; preds = %for.cond.i
+  %10 = load i32, ptr addrspace(3) @GCnt2, align 4
+  %cmp.i = icmp slt i32 %10, 2
+  store i1 %cmp.i, ptr addrspace(3) @GCmp2, align 1
+  br label %wg_cf9.i
+
+wg_cf9.i:                                         ; preds = %wg_leader8.i, %for.cond.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp2, align 1
+  br i1 %wg_val_cmp.i, label %for.body.i, label %lexit2
+
+for.body.i:                                       ; preds = %wg_cf9.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz27.i, label %TestMat25.i, label %LeaderMat22.i
+
+TestMat25.i:                                      ; preds = %for.body.i
+  store i64 %agg.tmp5.i.sroa.0.0, ptr addrspace(3) @WGCopy.6.0, align 8
+  store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel1 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.4.0, align 8
+  store i64 5, ptr addrspace(3) @WGCopy.3.0, align 8
+  store i64 %agg.tmp4.i.sroa.0.0, ptr addrspace(3) @WGCopy.5.0, align 8
+  br label %LeaderMat22.i
+
+LeaderMat22.i:                                    ; preds = %TestMat25.i, %for.body.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %11 = load i64, ptr addrspace(3) @WGCopy.3.0, align 8
+  %12 = load i64, ptr addrspace(3) @WGCopy.4.0, align 8
+  %13 = inttoptr i64 %12 to ptr addrspace(4)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  %14 = getelementptr inbounds i8, ptr addrspace(4) %13, i64 24
+  br label %for.cond.i.i
+
+for.cond.i.i:                                     ; preds = %for.body.i.i, %LeaderMat22.i
+  %storemerge.i.i = phi i64 [ %9, %LeaderMat22.i ], [ %add.i.i, %for.body.i.i ]
+  %cmp.i.i = icmp ult i64 %storemerge.i.i, %11
+  br i1 %cmp.i.i, label %for.body.i.i, label %lexit3
+
+for.body.i.i:                                     ; preds = %for.cond.i.i
+  call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+  %15 = load ptr addrspace(1), ptr addrspace(4) %14, align 8
+  %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %8
+  %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+  %inc.i.i.i.i = add nsw i32 %16, 1
+  store i32 %inc.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+  %add.i.i = add i64 %storemerge.i.i, %4
+  br label %for.cond.i.i
+
+lexit3: ; preds = %for.cond.i.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz27.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i:                                        ; preds = %lexit3
+  store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel1 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.6.0, align 8
+  store i64 %12, ptr addrspace(3) @WGCopy.4.0, align 8
+  store i64 %11, ptr addrspace(3) @WGCopy.3.0, align 8
+  store i64 2, ptr addrspace(3) @WGCopy.5.0, align 8
+  br label %LeaderMat.i
+
+LeaderMat.i:                                      ; preds = %TestMat.i, %lexit3
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %17 = load i64, ptr addrspace(3) @WGCopy.5.0, align 8
+  %18 = load i64, ptr addrspace(3) @WGCopy.6.0, align 8
+  %19 = inttoptr i64 %18 to ptr addrspace(4)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  %20 = getelementptr inbounds i8, ptr addrspace(4) %19, i64 24
+  br label %for.cond.i.i19
+
+for.cond.i.i19:                                   ; preds = %for.body.i.i22, %LeaderMat.i
+  %storemerge.i.i20 = phi i64 [ %9, %LeaderMat.i ], [ %add.i.i26, %for.body.i.i22 ]
+  %cmp.i.i21 = icmp ult i64 %storemerge.i.i20, %17
+  br i1 %cmp.i.i21, label %for.body.i.i22, label %lexit4
+
+for.body.i.i22:                                   ; preds = %for.cond.i.i19
+  call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+  %21 = load ptr addrspace(1), ptr addrspace(4) %20, align 8
+  %arrayidx.i.i.i.i.i23 = getelementptr inbounds i32, ptr addrspace(1) %21, i64 %8
+  %22 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i23, align 4
+  %inc.i.i.i.i25 = add nsw i32 %22, 1
+  store i32 %inc.i.i.i.i25, ptr addrspace(1) %arrayidx.i.i.i.i.i23, align 4
+  %add.i.i26 = add i64 %storemerge.i.i20, %4
+  br label %for.cond.i.i19
+
+lexit4: ; preds = %for.cond.i.i19
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz27.i, label %wg_leader17.i, label %wg_cf18.i
+
+wg_leader17.i:                                    ; preds = %lexit4
+  %23 = load i32, ptr addrspace(3) @GCnt2, align 4
+  %inc.i = add nsw i32 %23, 1
+  store i32 %inc.i, ptr addrspace(3) @GCnt2, align 4
+  br label %wg_cf18.i
+
+wg_cf18.i:                                        ; preds = %wg_leader17.i, %lexit4
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br label %for.cond.i
+
+lexit2: ; preds = %wg_cf9.i
+  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67)
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef)
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @test1(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+  %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8
+  %0 = load i64, ptr %_arg_dev_ptr1, align 8
+  %1 = load i64, ptr %_arg_dev_ptr2, align 8
+  %2 = load i64, ptr %_arg_dev_ptr3, align 8
+  store i64 %2, ptr addrspace(3) @GKernel2, align 8
+  store i64 %0, ptr addrspace(3) undef, align 8
+  store i64 %1, ptr addrspace(3) undef, align 8
+  %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+  store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+  %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+  %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+  %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+  %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67)
+  store i64 %3, ptr %agg.tmp67, align 1
+  %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+  store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+  store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+  store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+  %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %cmpz15.i = icmp eq i64 %7, 0
+  br i1 %cmpz15.i, label %leader.i, label %merge.i
+
+leader.i:                                         ; preds = %entry
+  call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.11, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false)
+  br label %merge.i
+
+merge.i:                                          ; preds = %leader.i, %entry
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.11, i64 32, i1 false)
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i:                                      ; preds = %merge.i
+  %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+  store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAscast3, align 8
+  store i32 0, ptr addrspace(3) @GCnt3, align 4
+  br label %wg_cf.i
+
+wg_cf.i:                                          ; preds = %wg_leader.i, %merge.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %8 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
+  %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+  %cmp.i.i.i.i.i.i = icmp ult i64 %8, 2147483648
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %wg_cf11.i, %wg_cf.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %wg_leader4.i, label %wg_cf5.i
+
+wg_leader4.i:                                     ; preds = %for.cond.i
+  %10 = load i32, ptr addrspace(3) @GCnt3, align 4
+  %cmp.i = icmp slt i32 %10, 2
+  store i1 %cmp.i, ptr addrspace(3) @GCmp3, align 1
+  br label %wg_cf5.i
+
+wg_cf5.i:                                         ; preds = %wg_leader4.i, %for.cond.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp3, align 1
+  br i1 %wg_val_cmp.i, label %for.body.i, label %lexit6
+
+for.body.i:                                       ; preds = %wg_cf5.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i:                                        ; preds = %for.body.i
+  store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel2 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.10.0, align 8
+  store i64 5, ptr addrspace(3) @WGCopy.9.0, align 8
+  br label %LeaderMat.i
+
+LeaderMat.i:                                      ; preds = %TestMat.i, %for.body.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %11 = load i64, ptr addrspace(3) @WGCopy.9.0, align 8
+  %12 = load i64, ptr addrspace(3) @WGCopy.10.0, align 8
+  %13 = inttoptr i64 %12 to ptr addrspace(4)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  %14 = getelementptr inbounds i8, ptr addrspace(4) %13, i64 24
+  br label %for.cond.i.i
+
+for.cond.i.i:                                     ; preds = %for.body.i.i, %LeaderMat.i
+  %storemerge.i.i = phi i64 [ %9, %LeaderMat.i ], [ %add.i.i, %for.body.i.i ]
+  %cmp.i.i = icmp ult i64 %storemerge.i.i, %11
+  br i1 %cmp.i.i, label %for.body.i.i, label %lexit7
+
+for.body.i.i:                                     ; preds = %for.cond.i.i
+  %cmp5.not.i.i.i.i.i.i = icmp ne i64 %storemerge.i.i, %9
+  %cond.i.i.i.i = zext i1 %cmp5.not.i.i.i.i.i.i to i32
+  call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+  %15 = load ptr addrspace(1), ptr addrspace(4) %14, align 8
+  %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %8
+  %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+  %add.i.i.i.i = add nsw i32 %16, %cond.i.i.i.i
+  store i32 %add.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+  %add.i.i = add i64 %storemerge.i.i, %4
+  br label %for.cond.i.i
+
+lexit7: ; preds = %for.cond.i.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz15.i, label %wg_leader10.i, label %wg_cf11.i
+
+wg_leader10.i:                                    ; preds = %lexit7
+  %17 = load i32, ptr addrspace(3) @GCnt3, align 4
+  %inc.i = add nsw i32 %17, 1
+  store i32 %inc.i, ptr addrspace(3) @GCnt3, align 4
+  br label %wg_cf11.i
+
+wg_cf11.i:                                        ; preds = %wg_leader10.i, %lexit7
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br label %for.cond.i
+
+lexit6: ; preds = %wg_cf5.i
+  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67)
+  ret void
+}
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @test2(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+  %priv.i = alloca %"class.sycl::_V1::private_memory", align 4
+  %agg.tmp67 = alloca %"class.sycl::_V1::group.15", align 8
+  %0 = load i64, ptr %_arg_dev_ptr1, align 8
+  %1 = load i64, ptr %_arg_dev_ptr2, align 8
+  %2 = load i64, ptr %_arg_dev_ptr3, align 8
+  store i64 %2, ptr addrspace(3) @GKernel3, align 8
+  store i64 %0, ptr addrspace(3) undef, align 8
+  store i64 %1, ptr addrspace(3) undef, align 8
+  %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+  store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+  %3 = load i64, ptr addrspace(1) undef, align 8
+  %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+  %5 = load i64, ptr addrspace(1) undef, align 8
+  %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+  %7 = load i64, ptr addrspace(1) undef, align 8
+  %8 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+  %9 = load i64, ptr addrspace(1) undef, align 8
+  %10 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %agg.tmp67)
+  store i64 %3, ptr %agg.tmp67, align 1
+  %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+  store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+  store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+  store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.5.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 32
+  store i64 %7, ptr %agg.tmp6.sroa.5.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.6.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 40
+  store i64 %8, ptr %agg.tmp6.sroa.6.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.7.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 48
+  store i64 %9, ptr %agg.tmp6.sroa.7.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.8.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 56
+  store i64 %10, ptr %agg.tmp6.sroa.8.0.agg.tmp67.sroa_idx, align 1
+  %11 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %cmpz32.i = icmp eq i64 %11, 0
+  br i1 %cmpz32.i, label %leader.i, label %merge.i
+
+leader.i:                                         ; preds = %entry
+  call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(64) @ArgShadow.17, ptr noundef nonnull align 8 dereferenceable(64) %agg.tmp67, i64 64, i1 false)
+  br label %merge.i
+
+merge.i:                                          ; preds = %leader.i, %entry
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(64) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(64) @ArgShadow.17, i64 64, i1 false)
+  %priv.ascast.i = addrspacecast ptr %priv.i to ptr addrspace(4)
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz32.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i:                                      ; preds = %merge.i
+  %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+  store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast4, align 8
+  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %priv.i)
+  store i32 0, ptr addrspace(3) @GCnt4, align 4
+  br label %wg_cf.i
+
+wg_cf.i:                                          ; preds = %wg_leader.i, %merge.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %12 = load i64, ptr addrspace(1) undef, align 8
+  %13 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
+  %14 = load i64, ptr addrspace(1) undef, align 8
+  %15 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+  %mul.i.i.i.i.i.i = mul i64 %12, %4
+  %add.i.i.i.i.i.i = add i64 %mul.i.i.i.i.i.i, %13
+  %cmp.i.i.i.i.i.i = icmp ult i64 %add.i.i.i.i.i.i, 2147483648
+  %conv.i.i.i.i.i = trunc i64 %add.i.i.i.i.i.i to i32
+  %y.i.i.i.i.i = getelementptr inbounds i8, ptr %priv.i, i64 4
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %wg_cf20.i, %wg_cf.i
+  %agg.tmp6.i.sroa.9.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp6.i.sroa.9.0.copyload40, %wg_cf20.i ]
+  %agg.tmp5.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp5.i.sroa.0.0.copyload44, %wg_cf20.i ]
+  %agg.tmp5.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp5.i.sroa.8.0.copyload48, %wg_cf20.i ]
+  %agg.tmp2.i.sroa.0.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp2.i.sroa.0.0.copyload52, %wg_cf20.i ]
+  %agg.tmp2.i.sroa.8.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp2.i.sroa.8.0.copyload56, %wg_cf20.i ]
+  %agg.tmp.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.0.0.copyload60, %wg_cf20.i ]
+  %agg.tmp.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.8.0.copyload64, %wg_cf20.i ]
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz32.i, label %wg_leader10.i, label %wg_cf11.i
+
+wg_leader10.i:                                    ; preds = %for.cond.i
+  %16 = load i32, ptr addrspace(3) @GCnt4, align 4
+  %cmp.i = icmp slt i32 %16, 2
+  store i1 %cmp.i, ptr addrspace(3) @GCmp4, align 1
+  br label %wg_cf11.i
+
+wg_cf11.i:                                        ; preds = %wg_leader10.i, %for.cond.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp4, align 1
+  br i1 %wg_val_cmp.i, label %for.body.i, label %for.end.i
+
+for.body.i:                                       ; preds = %wg_cf11.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz32.i, label %wg_leader13.i, label %wg_cf14.i
+
+wg_leader13.i:                                    ; preds = %for.body.i
+  br label %wg_cf14.i
+
+wg_cf14.i:                                        ; preds = %wg_leader13.i, %for.body.i
+  %agg.tmp2.i.sroa.0.1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GKernel3 to ptr addrspace(4)), %wg_leader13.i ], [ %agg.tmp2.i.sroa.0.0, %for.body.i ]
+  %agg.tmp2.i.sroa.8.1 = phi ptr addrspace(4) [ %priv.ascast.i, %wg_leader13.i ], [ %agg.tmp2.i.sroa.8.0, %for.body.i ]
+  %agg.tmp.i.sroa.0.1 = phi i64 [ 7, %wg_leader13.i ], [ %agg.tmp.i.sroa.0.0, %for.body.i ]
+  %agg.tmp.i.sroa.8.1 = phi i64 [ 3, %wg_leader13.i ], [ %agg.tmp.i.sroa.8.0, %for.body.i ]
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz32.i, label %TestMat30.i, label %LeaderMat27.i
+
+TestMat30.i:                                      ; preds = %wg_cf14.i
+  store i64 %agg.tmp.i.sroa.0.1, ptr addrspace(3) @WGCopy.13.0, align 8
+  store i64 %agg.tmp.i.sroa.8.1, ptr addrspace(3) @WGCopy.13.1, align 8
+  store ptr addrspace(4) %agg.tmp2.i.sroa.0.1, ptr addrspace(3) @WGCopy.14.0, align 8
+  store ptr addrspace(4) %agg.tmp2.i.sroa.8.1, ptr addrspace(3) @WGCopy.14.1, align 8
+  store i64 %agg.tmp5.i.sroa.0.0, ptr addrspace(3) @WGCopy.15.0, align 8
+  store i64 %agg.tmp5.i.sroa.8.0, ptr addrspace(3) @WGCopy.15.1, align 8
+  store ptr addrspace(4) %priv.ascast.i, ptr addrspace(3) @WGCopy.16.0, align 8
+  store ptr addrspace(4) %agg.tmp6.i.sroa.9.0, ptr addrspace(3) @WGCopy.16.1, align 8
+  br label %LeaderMat27.i
+
+LeaderMat27.i:                                    ; preds = %TestMat30.i, %wg_cf14.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %agg.tmp6.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.0, align 8
+  %agg.tmp6.i.sroa.9.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.1, align 8
+  %agg.tmp5.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WGCopy.15.0, align 8
+  %agg.tmp5.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WGCopy.15.1, align 8
+  %agg.tmp2.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.0, align 8
+  %agg.tmp.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WGCopy.13.0, align 8
+  %agg.tmp.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WGCopy.13.1, align 8
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  %17 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, i64 24
+  br label %for.cond.i.i
+
+for.cond.i.i:                                     ; preds = %lexit10, %LeaderMat27.i
+  %storemerge.i.i = phi i64 [ %14, %LeaderMat27.i ], [ %add.i.i, %lexit10 ]
+  %cmp.i.i = icmp ult i64 %storemerge.i.i, %agg.tmp.i.sroa.0.0.copyload
+  br i1 %cmp.i.i, label %for.cond.i.i.i, label %lexit11
+
+for.cond.i.i.i:                                   ; preds = %for.body.i.i.i, %for.cond.i.i
+  %storemerge.i.i.i = phi i64 [ %add.i.i.i, %for.body.i.i.i ], [ %15, %for.cond.i.i ]
+  %cmp.i.i.i = icmp ult i64 %storemerge.i.i.i, %agg.tmp.i.sroa.8.0.copyload
+  br i1 %cmp.i.i.i, label %for.body.i.i.i, label %lexit10
+
+for.body.i.i.i:                                   ; preds = %for.cond.i.i.i
+  call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+  %18 = load ptr addrspace(1), ptr addrspace(4) %17, align 8
+  %arrayidx.i.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %18, i64 %add.i.i.i.i.i.i
+  %19 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i.i, align 4
+  %inc.i.i.i.i.i = add nsw i32 %19, 1
+  store i32 %inc.i.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i.i, align 4
+  store i32 %conv.i.i.i.i.i, ptr %priv.i, align 4
+  store i32 5, ptr %y.i.i.i.i.i, align 4
+  %add.i.i.i = add i64 %storemerge.i.i.i, %6
+  br label %for.cond.i.i.i
+
+lexit10: ; preds = %for.cond.i.i.i
+  %add.i.i = add i64 %storemerge.i.i, %5
+  br label %for.cond.i.i
+
+lexit11: ; preds = %for.cond.i.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz32.i, label %wg_leader16.i, label %wg_cf17.i
+
+wg_leader16.i:                                    ; preds = %lexit11
+  br label %wg_cf17.i
+
+wg_cf17.i:                                        ; preds = %wg_leader16.i, %lexit11
+  %agg.tmp6.i.sroa.0.1 = phi ptr addrspace(4) [ %priv.ascast.i, %wg_leader16.i ], [ %agg.tmp6.i.sroa.0.0.copyload, %lexit11 ]
+  %agg.tmp6.i.sroa.9.1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GKernel3 to ptr addrspace(4)), %wg_leader16.i ], [ %agg.tmp6.i.sroa.9.0.copyload, %lexit11 ]
+  %agg.tmp5.i.sroa.0.1 = phi i64 [ 7, %wg_leader16.i ], [ %agg.tmp5.i.sroa.0.0.copyload, %lexit11 ]
+  %agg.tmp5.i.sroa.8.1 = phi i64 [ 3, %wg_leader16.i ], [ %agg.tmp5.i.sroa.8.0.copyload, %lexit11 ]
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz32.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i:                                        ; preds = %wg_cf17.i
+  store i64 %agg.tmp.i.sroa.0.0.copyload, ptr addrspace(3) @WGCopy.13.0, align 8
+  store i64 %agg.tmp.i.sroa.8.0.copyload, ptr addrspace(3) @WGCopy.13.1, align 8
+  store ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, ptr addrspace(3) @WGCopy.14.0, align 8
+  store ptr addrspace(4) %priv.ascast.i, ptr addrspace(3) @WGCopy.14.1, align 8
+  store i64 %agg.tmp5.i.sroa.0.1, ptr addrspace(3) @WGCopy.15.0, align 8
+  store i64 %agg.tmp5.i.sroa.8.1, ptr addrspace(3) @WGCopy.15.1, align 8
+  store ptr addrspace(4) %agg.tmp6.i.sroa.0.1, ptr addrspace(3) @WGCopy.16.0, align 8
+  store ptr addrspace(4) %agg.tmp6.i.sroa.9.1, ptr addrspace(3) @WGCopy.16.1, align 8
+  br label %LeaderMat.i
+
+LeaderMat.i:                                      ; preds = %TestMat.i, %wg_cf17.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %agg.tmp6.i.sroa.9.0.copyload40 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.1, align 8
+  %agg.tmp5.i.sroa.0.0.copyload44 = load i64, ptr addrspace(3) @WGCopy.15.0, align 8
+  %agg.tmp5.i.sroa.8.0.copyload48 = load i64, ptr addrspace(3) @WGCopy.15.1, align 8
+  %agg.tmp2.i.sroa.0.0.copyload52 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.0, align 8
+  %agg.tmp2.i.sroa.8.0.copyload56 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.1, align 8
+  %agg.tmp.i.sroa.0.0.copyload60 = load i64, ptr addrspace(3) @WGCopy.13.0, align 8
+  %agg.tmp.i.sroa.8.0.copyload64 = load i64, ptr addrspace(3) @WGCopy.13.1, align 8
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  %20 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp6.i.sroa.9.0.copyload40, i64 24
+  br label %for.cond.i.i25
+
+for.cond.i.i25:                                   ; preds = %lexit12, %LeaderMat.i
+  %storemerge.i.i26 = phi i64 [ %14, %LeaderMat.i ], [ %add.i.i31, %lexit12 ]
+  %cmp.i.i27 = icmp ult i64 %storemerge.i.i26, %agg.tmp5.i.sroa.0.0.copyload44
+  br i1 %cmp.i.i27, label %for.cond.i.i.i28, label %lexit13
+
+for.cond.i.i.i28:                                 ; preds = %for.body.i.i.i32, %for.cond.i.i25
+  %storemerge.i.i.i29 = phi i64 [ %add.i.i.i35, %for.body.i.i.i32 ], [ %15, %for.cond.i.i25 ]
+  %cmp.i.i.i30 = icmp ult i64 %storemerge.i.i.i29, %agg.tmp5.i.sroa.8.0.copyload48
+  br i1 %cmp.i.i.i30, label %for.body.i.i.i32, label %lexit12
+
+for.body.i.i.i32:                                 ; preds = %for.cond.i.i.i28
+  %21 = load i32, ptr %priv.i, align 4
+  %22 = load i32, ptr %y.i.i.i.i.i, align 4
+  %add.i.i.i.i.i = add nsw i32 %21, %22
+  call void @llvm.assume(i1 %cmp.i.i.i.i.i.i)
+  %23 = load ptr addrspace(1), ptr addrspace(4) %20, align 8
+  %arrayidx.i.i.i.i.i.i33 = getelementptr inbounds i32, ptr addrspace(1) %23, i64 %add.i.i.i.i.i.i
+  %24 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i.i33, align 4
+  %add4.i.i.i.i.i = add nsw i32 %24, %add.i.i.i.i.i
+  store i32 %add4.i.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i.i33, align 4
+  %add.i.i.i35 = add i64 %storemerge.i.i.i29, %6
+  br label %for.cond.i.i.i28
+
+lexit12: ; preds = %for.cond.i.i.i28
+  %add.i.i31 = add i64 %storemerge.i.i26, %5
+  br label %for.cond.i.i25
+
+lexit13: ; preds = %for.cond.i.i25
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz32.i, label %wg_leader19.i, label %wg_cf20.i
+
+wg_leader19.i:                                    ; preds = %lexit13
+  %25 = load i32, ptr addrspace(3) @GCnt4, align 4
+  %inc.i = add nsw i32 %25, 1
+  store i32 %inc.i, ptr addrspace(3) @GCnt4, align 4
+  br label %wg_cf20.i
+
+wg_cf20.i:                                        ; preds = %wg_leader19.i, %lexit13
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br label %for.cond.i
+
+for.end.i:                                        ; preds = %wg_cf11.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz32.i, label %wg_leader22.i, label %lexit14
+
+wg_leader22.i:                                    ; preds = %for.end.i
+  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %priv.i)
+  br label %lexit14
+
+lexit14: ; preds = %wg_leader22.i, %for.end.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %agg.tmp67)
+  ret void
+}
+
+; Function Attrs: convergent mustprogress norecurse nounwind
+define weak_odr dso_local spir_kernel void @test3(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) {
+entry:
+  %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8
+  %0 = load i64, ptr %_arg_dev_ptr1, align 8
+  %1 = load i64, ptr %_arg_dev_ptr2, align 8
+  %2 = load i64, ptr %_arg_dev_ptr3, align 8
+  store i64 %2, ptr addrspace(3) @GKernel4, align 8
+  store i64 %0, ptr addrspace(3) undef, align 8
+  store i64 %1, ptr addrspace(3) undef, align 8
+  %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2
+  store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8
+  %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32
+  %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32
+  %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32
+  %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32
+  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67)
+  store i64 %3, ptr %agg.tmp67, align 1
+  %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8
+  store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16
+  store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1
+  %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24
+  store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1
+  %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %cmpz16.i = icmp eq i64 %7, 0
+  br i1 %cmpz16.i, label %leader.i, label %merge.i
+
+leader.i:                                         ; preds = %entry
+  call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.21, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false)
+  br label %merge.i
+
+merge.i:                                          ; preds = %leader.i, %entry
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.21, i64 32, i1 false)
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz16.i, label %wg_leader.i, label %wg_cf.i
+
+wg_leader.i:                                      ; preds = %merge.i
+  %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4)
+  store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast5, align 8
+  store i32 0, ptr addrspace(3) @GCnt5, align 4
+  br label %wg_cf.i
+
+wg_cf.i:                                          ; preds = %wg_leader.i, %merge.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %wg_val_g.ascast.i = load ptr addrspace(4), ptr addrspace(3) @GAsCast5, align 8
+  %8 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32
+  %9 = trunc i64 %4 to i32
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %wg_cf12.i, %wg_cf.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz16.i, label %wg_leader5.i, label %wg_cf6.i
+
+wg_leader5.i:                                     ; preds = %for.cond.i
+  %10 = load i32, ptr addrspace(3) @GCnt5, align 4
+  %cmp.i = icmp slt i32 %10, 2
+  store i1 %cmp.i, ptr addrspace(3) @GCmp5, align 1
+  br label %wg_cf6.i
+
+wg_cf6.i:                                         ; preds = %wg_leader5.i, %for.cond.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp5, align 1
+  br i1 %wg_val_cmp.i, label %for.body.i, label %lexit20
+
+for.body.i:                                       ; preds = %wg_cf6.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz16.i, label %TestMat.i, label %LeaderMat.i
+
+TestMat.i:                                        ; preds = %for.body.i
+  store ptr addrspace(4) %wg_val_g.ascast.i, ptr addrspace(3) @WGCopy.20.0, align 8
+  store ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel4 to ptr addrspace(4)), ptr addrspace(3) @WGCopy.20.1, align 8
+  store i64 5, ptr addrspace(3) @WGCopy.19.0, align 8
+  br label %LeaderMat.i
+
+LeaderMat.i:                                      ; preds = %TestMat.i, %for.body.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  %11 = load i64, ptr addrspace(3) @WGCopy.19.0, align 8
+  %agg.tmp2.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.20.0, align 8
+  %agg.tmp2.i.sroa.6.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.20.1, align 8
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  %index.i.i.i.i.i = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, i64 24
+  %12 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.6.0.copyload, i64 24
+  %13 = trunc i64 %11 to i32
+  br label %for.cond.i.i
+
+for.cond.i.i:                                     ; preds = %for.body.i.i, %LeaderMat.i
+  %storemerge.i.i = phi i64 [ %8, %LeaderMat.i ], [ %add.i.i, %for.body.i.i ]
+  %cmp.i.i = icmp ult i64 %storemerge.i.i, %11
+  br i1 %cmp.i.i, label %for.body.i.i, label %lexit21
+
+for.body.i.i:                                     ; preds = %for.cond.i.i
+  %14 = load i64, ptr addrspace(4) %index.i.i.i.i.i, align 8
+  %mul.i.i.i.i = mul i64 %14, 10
+  %mul3.i.i.i.i = shl i64 %storemerge.i.i, 1
+  %add.i.i.i.i = add i64 %mul.i.i.i.i, %mul3.i.i.i.i
+  %15 = load ptr addrspace(1), ptr addrspace(4) %12, align 8
+  %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %add.i.i.i.i
+  %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+  %conv9.i.i.i.i = add i32 %16, %13
+  store i32 %conv9.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4
+  %add14.i.i.i.i = or disjoint i64 %add.i.i.i.i, 1
+  %17 = load ptr addrspace(1), ptr addrspace(4) %12, align 8
+  %arrayidx.i25.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %17, i64 %add14.i.i.i.i
+  %18 = load i32, ptr addrspace(1) %arrayidx.i25.i.i.i.i, align 4
+  %conv18.i.i.i.i = add i32 %18, %9
+  store i32 %conv18.i.i.i.i, ptr addrspace(1) %arrayidx.i25.i.i.i.i, align 4
+  %add.i.i = add i64 %storemerge.i.i, %4
+  br label %for.cond.i.i
+
+lexit21: ; preds = %for.cond.i.i
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272)
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br i1 %cmpz16.i, label %wg_leader11.i, label %wg_cf12.i
+
+wg_leader11.i:                                    ; preds = %lexit21
+  %19 = load i32, ptr addrspace(3) @GCnt5, align 4
+  %inc.i = add nsw i32 %19, 1
+  store i32 %inc.i, ptr addrspace(3) @GCnt5, align 4
+  br label %wg_cf12.i
+
+wg_cf12.i:                                        ; preds = %wg_leader11.i, %lexit21
+  call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272)
+  br label %for.cond.i
+
+lexit20: ; preds = %wg_cf6.i
+  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67)
+  ret void
+}

>From 4f1ca5c70afd571a2d7d7dcac6c245d77cfde928 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 27 Nov 2024 11:54:53 -0800
Subject: [PATCH 15/17] add tests

---
 .../CodeGen/SPIRV/pointers/gep-types-1.ll     | 42 +++++++++++++++++++
 .../CodeGen/SPIRV/pointers/gep-types-2.ll     | 36 ++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll

diff --git a/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll b/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll
new file mode 100644
index 00000000000000..0e2730e18bf382
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll
@@ -0,0 +1,42 @@
+; The goal of the test is to ensure that type inference doesn't break validity of the generated SPIR-V code.
+; The only pass criterion is that spirv-val considers output valid.
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#Char:]] = OpTypeInt 8 0
+; CHECK-DAG: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]]
+; CHECK-DAG: %[[#PtrCharCW:]] = OpTypePointer CrossWorkgroup %[[#Char]]
+; CHECK-DAG: %[[#PtrCharGen:]] = OpTypePointer Generic %[[#Char]]
+; CHECK-DAG: %[[#Struct:]] = OpTypeStruct %[[#]] %[[#]] %[[#]]
+; CHECK-DAG: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]]
+; CHECK-DAG: %[[#PtrPtrCharGen:]] = OpTypePointer Function %[[#PtrCharGen]]
+; CHECK-DAG: %[[#PtrStruct:]] = OpTypePointer Function %[[#Struct]]
+; CHECK: OpFunction
+; CHECK: %[[#Arg1:]] = OpFunctionParameter %[[#Int]]
+; CHECK: %[[#Arg2:]] = OpFunctionParameter %[[#PtrCharCW]]
+; CHECK: %[[#Kernel:]] = OpVariable %[[#PtrStruct]] Function
+; CHECK: %[[#IntKernel:]] = OpBitcast %[[#PtrInt]] %[[#Kernel]]
+; CHECK: OpStore %[[#IntKernel]] %[[#Arg1]]
+; CHECK: %[[#CharKernel:]] = OpBitcast %[[#PtrChar]] %[[#Kernel]]
+; CHECK: %[[#P:]] = OpInBoundsPtrAccessChain %[[#PtrChar]] %[[#CharKernel]] %[[#]]
+; CHECK: %[[#R0:]] = OpPtrCastToGeneric %[[#PtrCharGen]] %[[#Arg2]]
+; CHECK: %[[#P2:]] = OpBitcast %[[#PtrPtrCharGen]] %[[#P]]
+; CHECK: OpStore %[[#P2]] %[[#R0]]
+; CHECK: %[[#P3:]] = OpBitcast %[[#PtrPtrCharGen]] %[[#P]]
+; CHECK: %[[#]] = OpLoad %[[#PtrCharGen]] %[[#P3]]
+
+%"class.std::complex" = type { { double, double } }
+%class.anon = type { i32, ptr addrspace(4), [2 x [2 x %"class.std::complex"]] }
+
+define weak_odr dso_local spir_kernel void @foo(i32 noundef %_arg_N, ptr addrspace(1) noundef align 8 %_arg_p) {
+entry:
+  %Kernel = alloca %class.anon, align 8
+  store i32 %_arg_N, ptr %Kernel, align 8
+  %p = getelementptr inbounds i8, ptr %Kernel, i64 8
+  %r0 = addrspacecast ptr addrspace(1) %_arg_p to ptr addrspace(4)
+  store ptr addrspace(4) %r0, ptr %p, align 8
+  %r3 = load ptr addrspace(4), ptr %p, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll b/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll
new file mode 100644
index 00000000000000..d94da31890ab17
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll
@@ -0,0 +1,36 @@
+; The goal of the test is to ensure that type inference doesn't break validity of the generated SPIR-V code.
+; The only pass criterion is that spirv-val considers output valid.
+
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpFunction
+
+%class.anon = type { i32, ptr addrspace(4)}
+
+define weak_odr dso_local spir_kernel void @foo(i32 noundef %_arg_N, i1 %fl) {
+entry:
+  %__SYCLKernel = alloca %class.anon, align 8
+  store i32 %_arg_N, ptr %__SYCLKernel, align 8
+  br label %arinit
+
+arinit:
+  %scevgep3 = getelementptr nuw i8, ptr %__SYCLKernel, i64 24
+  br label %for.cond.i
+
+for.cond.i:
+  %lsr.iv4 = phi ptr [ %scevgep5, %for.body.i ], [ %scevgep3, %arinit ]
+  br i1 %fl, label %for.body.i, label %exit
+
+for.body.i:
+  %scevgep6 = getelementptr i8, ptr %lsr.iv4, i64 -8
+  %_M_value.imag.i.i = load double, ptr %lsr.iv4, align 8
+  %scevgep5 = getelementptr i8, ptr %lsr.iv4, i64 32
+  br label %for.cond.i
+
+exit:
+  ret void
+}

>From 15aec203682bd0d8a6a413f103fdf2b6175614a6 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Wed, 27 Nov 2024 12:07:11 -0800
Subject: [PATCH 16/17] remove comments

---
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 53 -------------------
 llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp   | 10 ++--
 2 files changed, 5 insertions(+), 58 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 20bd9fe4fa72ce..a323f5d01b3e66 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -509,27 +509,6 @@ void SPIRVEmitIntrinsics::propagateElemTypeRec(
       continue;
     if (!VisitedSubst.insert(std::make_pair(U, Op)).second)
       continue;
-    /*
-    if (auto *Ref = dyn_cast<GetElementPtrInst>(U)) {
-      CallInst *AssignCI = GR->findAssignPtrTypeInstr(Ref);
-      if (AssignCI && Ref->getPointerOperand() == Op) {
-        Type *PrevElemTy = GR->findDeducedElementType(Ref);
-        assert(PrevElemTy && "Expected valid element type");
-        // evaluate a new GEP type
-        Type *NewElemTy = PtrElemTy;
-        for (Use &RefUse : drop_begin(Ref->indices()))
-          NewElemTy =
-              GetElementPtrInst::getTypeAtIndex(NewElemTy, RefUse.get());
-        // record the new GEP type
-        assert(NewElemTy && "Expected valid GEP indices");
-        updateAssignType(AssignCI, Ref, PoisonValue::get(NewElemTy));
-        // recursively propagate change
-        propagateElemTypeRec(Ref, NewElemTy, PrevElemTy, VisitedSubst, Visited,
-                             Ptrcasts);
-      }
-      continue;
-    }
-    */
     Instruction *UI = dyn_cast<Instruction>(U);
     // If the instruction was validated already, we need to keep it valid by
     // keeping current Op type.
@@ -652,25 +631,6 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
     } else {
       Ty = Ref->getResultElementType();
     }
-    /*
-    if (Type *PtrElemTy = GR->findDeducedElementType(Ref->getPointerOperand()))
-    { Ty = PtrElemTy; for (Use &U : drop_begin(Ref->indices())) Ty =
-    GetElementPtrInst::getTypeAtIndex(Ty, U.get()); if
-    (isTodoType(Ref->getPointerOperand())) insertTodoType(Ref); } else if
-    (isNestedPointer(Ref->getSourceElementType())) { Ty =
-    Ref->getSourceElementType(); for (Use &U : drop_begin(Ref->indices())) Ty =
-    GetElementPtrInst::getTypeAtIndex(Ty, U.get()); } else { Ty =
-    Ref->getResultElementType();
-    }
-    */
-    /*
-    if (isNestedPointer(Ref->getSourceElementType())) {
-      Type *PtrElemTy = GR->findDeducedElementType(Ref->getPointerOperand());
-      Ty = PtrElemTy ? PtrElemTy : Ref->getSourceElementType();
-      for (Use &U : drop_begin(Ref->indices()))
-        Ty = GetElementPtrInst::getTypeAtIndex(Ty, U.get());
-    }
-    */
   } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
     Value *Op = Ref->getPointerOperand();
     Type *KnownTy = GR->findDeducedElementType(Op);
@@ -1493,22 +1453,18 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
   if (SI) {
     Value *Op = SI->getValueOperand();
     Value *Pointer = SI->getPointerOperand();
-    // if (!GR->findDeducedElementType(Pointer) || isTodoType(Pointer)) {
     Type *OpTy = Op->getType();
     if (auto *OpI = dyn_cast<Instruction>(Op))
       OpTy = restoreMutatedType(GR, OpI, OpTy);
     if (OpTy == Op->getType())
       OpTy = deduceElementTypeByValueDeep(OpTy, Op, false);
     replacePointerOperandWithPtrCast(I, Pointer, OpTy, 1, B);
-    //}
     return;
   }
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     Value *Pointer = LI->getPointerOperand();
-    // if (!GR->findDeducedElementType(Pointer) || isTodoType(Pointer)) {
     Type *OpTy = LI->getType();
     if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
-      // TODO: isNestedPointer() instead of dyn_cast<PointerType>
       if (Type *ElemTy = GR->findDeducedElementType(LI)) {
         OpTy = getTypedPointerWrapper(ElemTy, PtrTy->getAddressSpace());
       } else {
@@ -1519,7 +1475,6 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
       }
     }
     replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
-    //}
     return;
   }
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
@@ -2250,14 +2205,6 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
         if (ElemTy != KnownTy) {
           DenseSet<std::pair<Value *, Value *>> VisitedSubst;
           propagateElemType(CI, ElemTy, VisitedSubst);
-          /*
-          if (isa<CallInst>(Op)) {
-            propagateElemType(CI, ElemTy, VisitedSubst);
-          } else {
-            updateAssignType(AssignCI, CI, PoisonValue::get(ElemTy));
-            propagateElemTypeRec(CI, ElemTy, KnownTy, VisitedSubst);
-          }
-          */
           eraseTodoType(Op);
           continue;
         }
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index f67b28f1edd1c8..d5b81bf46c804e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -394,11 +394,11 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const {
       case SPIRV::OpGenericCastToPtr:
         validateAccessChain(STI, MRI, GR, MI);
         break;
-//      case SPIRV::OpPtrAccessChain:
-//      case SPIRV::OpInBoundsPtrAccessChain:
-//        if (MI.getNumOperands() == 4)
-//          validateAccessChain(STI, MRI, GR, MI);
-//        break;
+      case SPIRV::OpPtrAccessChain:
+      case SPIRV::OpInBoundsPtrAccessChain:
+        if (MI.getNumOperands() == 4)
+          validateAccessChain(STI, MRI, GR, MI);
+        break;
 
       case SPIRV::OpFunctionCall:
         // ensure there is no mismatch between actual and expected arg types:

>From b6d8fce5df72abcbf7267e6be105bd533826cbae Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Thu, 28 Nov 2024 04:32:55 -0800
Subject: [PATCH 17/17] try to repair the unexpected code pattern in Duplicate
 Tracker

---
 .../Target/SPIRV/SPIRVDuplicatesTracker.cpp   | 35 +++++++++++++------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
index b82c2538a81368..48df845efd76b1 100644
--- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
@@ -69,16 +69,31 @@ void SPIRVGeneralDuplicatesTracker::buildDepsGraph(
         MachineOperand *RegOp = &VRegDef->getOperand(0);
         if (Reg2Entry.count(RegOp) == 0 &&
             (MI->getOpcode() != SPIRV::OpVariable || i != 3)) {
-          std::string DiagMsg;
-          raw_string_ostream OS(DiagMsg);
-          OS << "Unexpected pattern while building a dependency "
-                "graph.\nInstruction: ";
-          MI->print(OS);
-          OS << "Operand: ";
-          Op.print(OS);
-          OS << "\nOperand definition: ";
-          VRegDef->print(OS);
-          report_fatal_error(DiagMsg.c_str());
+          // try to repair the unexpected code pattern
+          bool IsFixed = false;
+          if (VRegDef->getOpcode() == TargetOpcode::G_CONSTANT &&
+              RegOp->isReg() && MRI.getType(RegOp->getReg()).isScalar()) {
+            const Constant *C = VRegDef->getOperand(1).getCImm();
+            add(C, MI->getParent()->getParent(), RegOp->getReg());
+            auto Iter = CT.Storage.find(C);
+            if (Iter != CT.Storage.end()) {
+              SPIRV::DTSortableEntry &MissedEntry = Iter->second;
+              Reg2Entry[RegOp] = &MissedEntry;
+              IsFixed = true;
+            }
+          }
+          if (!IsFixed) {
+            std::string DiagMsg;
+            raw_string_ostream OS(DiagMsg);
+            OS << "Unexpected pattern while building a dependency "
+                  "graph.\nInstruction: ";
+            MI->print(OS);
+            OS << "Operand: ";
+            Op.print(OS);
+            OS << "\nOperand definition: ";
+            VRegDef->print(OS);
+            report_fatal_error(DiagMsg.c_str());
+          }
         }
         if (Reg2Entry.count(RegOp))
           E->addDep(Reg2Entry[RegOp]);



More information about the llvm-commits mailing list