[llvm] FEAT : Added supoort for the extenion SPV_INTEL_fpga_memory_acesses (PR #133210)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 26 22:41:51 PDT 2025


https://github.com/sumesh-s-mcw created https://github.com/llvm/llvm-project/pull/133210

None

>From 3a9338d7fc1691457ed146f456ac8380a08e05ee Mon Sep 17 00:00:00 2001
From: sumesh-s-mcw <sumesh.suresh at multicorewareinc.com>
Date: Fri, 21 Mar 2025 11:01:00 +0530
Subject: [PATCH] FEAT : Added supoort for the extenion
 SPV_INTEL_fpga_memory_acesses

---
 llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp    |   2 +
 llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp |   5 +
 .../Target/SPIRV/SPIRVPrepareFunctions.cpp    | 143 ++++++++-
 .../lib/Target/SPIRV/SPIRVSymbolicOperands.td |   6 +-
 .../IntelFPGAMemoryAccesses.ll                | 273 ++++++++++++++++++
 .../fpga_lsu_function_call.ll                 |  36 +++
 .../intel_fpga_lsu_optimized.ll               | 152 ++++++++++
 7 files changed, 604 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/IntelFPGAMemoryAccesses.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/fpga_lsu_function_call.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/intel_fpga_lsu_optimized.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 37119bf01545c..b6da420910673 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -89,6 +89,8 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
          SPIRV::Extension::Extension::SPV_KHR_cooperative_matrix},
         {"SPV_KHR_non_semantic_info",
          SPIRV::Extension::Extension::SPV_KHR_non_semantic_info},
+        {"SPV_INTEL_fpga_memory_accesses",
+         SPIRV::Extension::Extension::SPV_INTEL_fpga_memory_accesses},
         {"SPV_INTEL_long_composites",
          SPIRV::Extension::Extension::SPV_INTEL_long_composites},
         {"SPV_INTEL_fp_max_error",
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index acc8c014cb26b..14a8aee845d38 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -893,6 +893,11 @@ static void addOpDecorateReqs(const MachineInstr &MI, unsigned DecIndex,
   } else if (Dec == SPIRV::Decoration::FPMaxErrorDecorationINTEL) {
     Reqs.addRequirements(SPIRV::Capability::FPMaxErrorINTEL);
     Reqs.addExtension(SPIRV::Extension::SPV_INTEL_fp_max_error);
+  } else if (Dec == SPIRV::Decoration::BurstCoalesceINTEL ||
+             Dec == SPIRV::Decoration::CacheSizeINTEL ||
+             Dec == SPIRV::Decoration::DontStaticallyCoalesceINTEL ||
+             Dec == SPIRV::Decoration::PrefetchINTEL) {
+    Reqs.addExtension(SPIRV::Extension::SPV_INTEL_fpga_memory_accesses);
   }
 }
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 028699e56a946..1fac5b03630d0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -91,11 +91,62 @@ static Function *getOrCreateFunction(Module *M, Type *RetTy,
   return NewF;
 }
 
+enum IntelFPGAMemoryAccessesVal {
+  BurstCoalesce = 0x1,
+  CacheSizeFlag = 0x2,
+  DontStaticallyCoalesce = 0x4,
+  PrefetchFlag = 0x8
+};
+
+using DecorationsInfoVec =
+    std::vector<std::pair<unsigned, std::vector<std::string>>>;
+
+struct IntelLSUControlsInfo {
+  void setWithBitMask(unsigned ParamsBitMask) {
+    if (ParamsBitMask & IntelFPGAMemoryAccessesVal::BurstCoalesce)
+      BurstCoalesce = true;
+    if (ParamsBitMask & IntelFPGAMemoryAccessesVal::CacheSizeFlag)
+      CacheSizeInfo = 0;
+    if (ParamsBitMask & IntelFPGAMemoryAccessesVal::DontStaticallyCoalesce)
+      DontStaticallyCoalesce = true;
+    if (ParamsBitMask & IntelFPGAMemoryAccessesVal::PrefetchFlag)
+      PrefetchInfo = 0;
+  }
+
+  DecorationsInfoVec getDecorationsFromCurrentState() {
+    DecorationsInfoVec ResultVec;
+    if (BurstCoalesce)
+      ResultVec.emplace_back(SPIRV::Decoration::Decoration::BurstCoalesceINTEL,
+                             std::vector<std::string>());
+    if (DontStaticallyCoalesce)
+      ResultVec.emplace_back(
+          SPIRV::Decoration::Decoration::DontStaticallyCoalesceINTEL,
+          std::vector<std::string>());
+
+    if (CacheSizeInfo.has_value()) {
+      ResultVec.emplace_back(
+          SPIRV::Decoration::Decoration::CacheSizeINTEL,
+          std::vector<std::string>{std::to_string(CacheSizeInfo.value())});
+    }
+    if (PrefetchInfo.has_value()) {
+      ResultVec.emplace_back(
+          SPIRV::Decoration::Decoration::PrefetchINTEL,
+          std::vector<std::string>{std::to_string(PrefetchInfo.value())});
+    }
+    return ResultVec;
+  }
+
+  bool BurstCoalesce = false;
+  std::optional<unsigned> CacheSizeInfo;
+  bool DontStaticallyCoalesce = false;
+  std::optional<unsigned> PrefetchInfo;
+};
+
 static bool lowerIntrinsicToFunction(IntrinsicInst *Intrinsic) {
-  // For @llvm.memset.* intrinsic cases with constant value and length arguments
-  // are emulated via "storing" a constant array to the destination. For other
-  // cases we wrap the intrinsic in @spirv.llvm_memset_* function and expand the
-  // intrinsic to a loop via expandMemSetAsLoop().
+  // For @llvm.memset.* intrinsic cases with constant value and length
+  // arguments are emulated via "storing" a constant array to the destination.
+  // For other cases we wrap the intrinsic in @spirv.llvm_memset_* function
+  // and expand the intrinsic to a loop via expandMemSetAsLoop().
   if (auto *MSI = dyn_cast<MemSetInst>(Intrinsic))
     if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
       return false; // It is handled later using OpCopyMemorySized.
@@ -243,7 +294,7 @@ static SmallVector<Metadata *> parseAnnotation(Value *I,
                                                 : SmallVector<Metadata *>{};
 }
 
-static void lowerPtrAnnotation(IntrinsicInst *II) {
+static void lowerPtrAnnotation(IntrinsicInst *II, const SPIRVSubtarget &STI) {
   LLVMContext &Ctx = II->getContext();
   Type *Int32Ty = Type::getInt32Ty(Ctx);
 
@@ -256,10 +307,77 @@ static void lowerPtrAnnotation(IntrinsicInst *II) {
   std::string Anno =
       getAnnotation(II->getArgOperand(1),
                     4 < II->arg_size() ? II->getArgOperand(4) : nullptr);
+  // messed code will correct it once it is working
+  // PARSE THE ANOTATION
+  std::regex DecorationRegex("\\{\\w([\\w:,-]|\"[^\"]*\")*\\}");
+  using RegexIterT = std::sregex_iterator;
+  RegexIterT DecorationsIt(Anno.cbegin(), Anno.cend(), DecorationRegex);
+  RegexIterT DecorationsEnd;
+  IntelLSUControlsInfo LSUControls;
+  for (; DecorationsIt != DecorationsEnd; ++DecorationsIt) {
+    std::smatch Match = *DecorationsIt;
+    std::string DecorationStr = Match.str();
+    std::string AnnotatedDecoration =
+        DecorationStr.substr(1, DecorationStr.length() - 2);
+    llvm::StringRef AnnotatedRef(AnnotatedDecoration);
+    std::pair<llvm::StringRef, llvm::StringRef> Split = AnnotatedRef.split(':');
+    llvm::StringRef Name = Split.first, ValueStr = Split.second;
+
+    bool canUseFPGA = STI.canUseExtension(
+        SPIRV::Extension::Extension::SPV_INTEL_fpga_memory_accesses);
+    if (canUseFPGA) {
+      if (Name == "params") {
+        unsigned ParamsBitMask = 0;
+        bool Failure = ValueStr.getAsInteger(10, ParamsBitMask);
+        assert(!Failure && "Non-integer LSU controls value");
+        (void)Failure;
+        LSUControls.setWithBitMask(ParamsBitMask);
+      } else if (Name == "cache-size") {
+        if (!LSUControls.CacheSizeInfo.has_value())
+          continue;
+        unsigned CacheSizeValue = 0;
+        bool Failure = ValueStr.getAsInteger(10, CacheSizeValue);
+        assert(!Failure && "Non-integer cache size value");
+        (void)Failure;
+        LSUControls.CacheSizeInfo = CacheSizeValue;
+      }
+    }
+  }
+
+  DecorationsInfoVec currentDecorations =
+      LSUControls.getDecorationsFromCurrentState();
 
-  // Parse the annotation.
   SmallVector<Metadata *> MDs = parseAnnotation(II, Anno, Ctx, Int32Ty);
 
+  for (const auto &Dec : currentDecorations) {
+    unsigned DecKind = Dec.first;
+    const std::vector<std::string> &DecValues = Dec.second;
+    SmallVector<Metadata *> metaDataItem;
+    auto Decoration = ConstantAsMetadata::get(
+        ConstantInt::get(Int32Ty, static_cast<uint32_t>(DecKind)));
+    metaDataItem.push_back(Decoration);
+    if (!DecValues.empty()) {
+      for (const auto &val : DecValues) {
+        int32_t numValue;
+        if (llvm::to_integer(val, numValue, 10)) {
+          metaDataItem.push_back(
+              ConstantAsMetadata::get(ConstantInt::get(Int32Ty, numValue)));
+        } else {
+          metaDataItem.push_back(MDString::get(Ctx, val));
+        }
+      }
+    }
+    MDs.push_back(MDNode::get(Ctx, metaDataItem));
+
+  }
+
+  // MDs.push_back(ConstantAsMetadata::get(
+  //     ConstantInt::get(Type::getInt32Ty(Ctx), DecKind)));
+  // for (const std::string &Value : DecValues) {
+  //   MDs.push_back(MDString::get(Ctx, Value));
+  // }
+  //}
+
   // If the annotation string is not parsed successfully we don't know the
   // format used and output it as a general UserSemantic decoration.
   // Otherwise MDs is a Metadata tuple (a decoration list) in the format
@@ -281,9 +399,9 @@ static void lowerPtrAnnotation(IntrinsicInst *II) {
 
 static void lowerFunnelShifts(IntrinsicInst *FSHIntrinsic) {
   // Get a separate function - otherwise, we'd have to rework the CFG of the
-  // current one. Then simply replace the intrinsic uses with a call to the new
-  // function.
-  // Generate LLVM IR for  i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i* %c)
+  // current one. Then simply replace the intrinsic uses with a call to the
+  // new function. Generate LLVM IR for  i* @spirv.llvm_fsh?_i* (i* %a, i* %b,
+  // i* %c)
   Module *M = FSHIntrinsic->getModule();
   FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType();
   Type *FSHRetTy = FSHFuncTy->getReturnType();
@@ -330,8 +448,8 @@ static void lowerFunnelShifts(IntrinsicInst *FSHIntrinsic) {
     // the LSBs.
     SecShift = IRB.CreateShl(FSHFunc->getArg(0), SubRotateVal);
   } else {
-    // ...and right-shift the less significant int by this number, zero-filling
-    // the MSBs.
+    // ...and right-shift the less significant int by this number,
+    // zero-filling the MSBs.
     SecShift = IRB.CreateLShr(FSHFunc->getArg(1), SubRotateVal);
   }
   // A simple binary addition of the shifted ints yields the final result.
@@ -420,7 +538,8 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
             II, Intrinsic::SPVIntrinsics::spv_lifetime_end, {1});
         break;
       case Intrinsic::ptr_annotation:
-        lowerPtrAnnotation(II);
+        const SPIRVSubtarget &STI = TM.getSubtarget<SPIRVSubtarget>(*F);
+        lowerPtrAnnotation(II, STI);
         Changed = true;
         break;
       }
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index caee778eddbc4..5bbe6b9e86601 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -513,6 +513,7 @@ defm LongCompositesINTEL : CapabilityOperand<6089, 0, 0, [SPV_INTEL_long_composi
 defm BindlessImagesINTEL : CapabilityOperand<6528, 0, 0, [SPV_INTEL_bindless_images], []>;
 defm MemoryAccessAliasingINTEL : CapabilityOperand<5910, 0, 0, [SPV_INTEL_memory_access_aliasing], []>;
 defm FPMaxErrorINTEL : CapabilityOperand<6169, 0, 0, [SPV_INTEL_fp_max_error], []>;
+defm FPGAMemoryAccessesINTEL : CapabilityOperand<5898, 0, 0, [SPV_INTEL_fpga_memory_accesses], []>;
 
 //===----------------------------------------------------------------------===//
 // Multiclass used to define SourceLanguage enum values and at the same time
@@ -1264,7 +1265,10 @@ defm FunctionFloatingPointModeINTEL : DecorationOperand<6080, 0, 0, [], [Functio
 defm AliasScopeINTEL : DecorationOperand<5914, 0, 0, [], [MemoryAccessAliasingINTEL]>;
 defm NoAliasINTEL : DecorationOperand<5915, 0, 0, [], [MemoryAccessAliasingINTEL]>;
 defm FPMaxErrorDecorationINTEL : DecorationOperand<6170, 0, 0, [], [FPMaxErrorINTEL]>;
-
+defm BurstCoalesceINTEL : DecorationOperand<5899, 0, 0, [], [FPGAMemoryAccessesINTEL]>;
+defm CacheSizeINTEL : DecorationOperand<5900, 0, 0, [], [FPGAMemoryAccessesINTEL]>;
+defm DontStaticallyCoalesceINTEL  : DecorationOperand<5901, 0, 0, [], [FPGAMemoryAccessesINTEL]>;
+defm PrefetchINTEL : DecorationOperand<5902, 0, 0, [], [FPGAMemoryAccessesINTEL]>;
 //===----------------------------------------------------------------------===//
 // Multiclass used to define BuiltIn enum values and at the same time
 // SymbolicOperand entries with string mnemonics, versioning, extensions and
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/IntelFPGAMemoryAccesses.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/IntelFPGAMemoryAccesses.ll
new file mode 100644
index 0000000000000..0ad6e84db9926
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/IntelFPGAMemoryAccesses.ll
@@ -0,0 +1,273 @@
+; LLVM IR generated by Intel SYCL Clang compiler (https://github.com/intel/llvm)
+; SYCL source code can be found below:
+
+; #define BURST_COAL 0x1
+; #define CACHE_SIZE_FLAG 0x2
+; #define DONT_STATICALLY_COAL 0x4
+; #define PREFETCH 0x8
+; struct State {
+;   float Field1;
+;   int Field2;
+; };
+;
+; void foo(float *A, int *B, State *C) {
+;   float *x;
+;   int *y;
+;   State *z;
+;   double *t;
+;   x = __builtin_intel_fpga_mem(A, BURST_COAL | CACHE_SIZE_FLAG, 0);
+;   y = __builtin_intel_fpga_mem(B, DONT_STATICALLY_COAL | PREFETCH, 0);
+;   z = __builtin_intel_fpga_mem(C, CACHE_SIZE_FLAG, 127);
+;   x = __builtin_intel_fpga_mem(&C->Field1, BURST_COAL | CACHE_SIZE_FLAG, 127);
+;   y = __builtin_intel_fpga_mem(&C->Field2, 0, 127);
+;   z = __builtin_intel_fpga_mem(C, BURST_COAL | CACHE_SIZE_FLAG | DONT_STATICALLY_COAL | PREFETCH, 127);
+;   t = __builtin_intel_fpga_mem((double *) A, BURST_COAL | CACHE_SIZE_FLAG, 0);
+;   *__builtin_intel_fpga_mem(A, BURST_COAL | CACHE_SIZE_FLAG, 0) = 5;
+;   int s = *__builtin_intel_fpga_mem(B, DONT_STATICALLY_COAL | PREFETCH, 0);
+; }
+;
+; template <typename name, typename Func>
+; __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) {
+;   kernelFunc();
+; }
+;
+; int main() {
+;   kernel_single_task<class fake_kernel>([]() {
+;     float *A;
+;     int *B;
+;     State *C;
+;     foo(A, B, C);
+;   });
+;   return 0;
+; }
+
+; RUN: llc -O0 -verify-machineinstrs  -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_fpga_memory_accesses %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV
+
+; CHECK-SPIRV: OpCapability FPGAMemoryAccessesINTEL
+; CHECK-SPIRV: Extension "SPV_INTEL_fpga_memory_accesses"
+; Check that the semantically meaningless decoration was
+; translated as a mere annotation
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} UserSemantic "{params:0}{cache-size:127}"
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} BurstCoalesceINTEL
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} CacheSizeINTEL 0
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} CacheSizeINTEL 127
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} DontStaticallyCoalesceINTEL
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} PrefetchINTEL 0
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%"class._ZTSZ4mainE3$_0.anon" = type { i8 }
+%struct._ZTS5State.State = type { float, i32 }
+
+; CHECK-LLVM: [[PARAM_3_CACHE_0:@[a-z0-9_.]+]] = {{.*}}{params:3}{cache-size:0}
+ at .str = private unnamed_addr addrspace(1) constant [25 x i8] c"{params:3}{cache-size:0}\00", section "llvm.metadata"
+ at .str.1 = private unnamed_addr addrspace(1) constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata"
+; "params" bitmask doesn't hold 0x2 (cache size ON), so cache-size can be dropped
+; during translation
+; CHECK-LLVM: [[PARAM_12_CACHE_0:@[a-z0-9_.]+]] = {{.*}}{params:12}
+ at .str.2 = private unnamed_addr addrspace(1) constant [26 x i8] c"{params:12}{cache-size:0}\00", section "llvm.metadata"
+; CHECK-LLVM: [[PARAM_2_CACHE_127:@[a-z0-9_.]+]] = {{.*}}{params:2}{cache-size:127}
+ at .str.3 = private unnamed_addr addrspace(1) constant [27 x i8] c"{params:2}{cache-size:127}\00", section "llvm.metadata"
+; CHECK-LLVM: [[PARAM_3_CACHE_127:@[a-z0-9_.]+]] = {{.*}}{params:3}{cache-size:127}
+ at .str.4 = private unnamed_addr addrspace(1) constant [27 x i8] c"{params:3}{cache-size:127}\00", section "llvm.metadata"
+; Since "params" bitmask is set to 0, the next string isn't required to be preserved
+; during translation. Neither is the corresponding pointer annotation intrinsic.
+ at .str.5 = private unnamed_addr addrspace(1) constant [27 x i8] c"{params:0}{cache-size:127}\00", section "llvm.metadata"
+; CHECK-LLVM: [[PARAM_15_CACHE_127:@[a-z0-9_.]+]] = {{.*}}{params:15}{cache-size:127}
+ at .str.6 = private unnamed_addr addrspace(1) constant [28 x i8] c"{params:15}{cache-size:127}\00", section "llvm.metadata"
+
+; Function Attrs: norecurse nounwind
+define spir_kernel void @_ZTSZ4mainE11fake_kernel() #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 {
+entry:
+  %0 = alloca %"class._ZTSZ4mainE3$_0.anon", align 1
+  %1 = bitcast %"class._ZTSZ4mainE3$_0.anon"* %0 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1) #5
+  %2 = addrspacecast %"class._ZTSZ4mainE3$_0.anon"* %0 to %"class._ZTSZ4mainE3$_0.anon" addrspace(4)*
+  call spir_func void @"_ZZ4mainENK3$_0clEv"(%"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %2)
+  %3 = bitcast %"class._ZTSZ4mainE3$_0.anon"* %0 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %3) #5
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* captures(none)) #1
+
+; Function Attrs: inlinehint norecurse nounwind
+define internal spir_func void @"_ZZ4mainENK3$_0clEv"(%"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this) #2 align 2 {
+entry:
+  %this.addr = alloca %"class._ZTSZ4mainE3$_0.anon" addrspace(4)*, align 8
+  %A = alloca float addrspace(4)*, align 8
+  %B = alloca i32 addrspace(4)*, align 8
+  %C = alloca %struct._ZTS5State.State addrspace(4)*, align 8
+  store %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this, %"class._ZTSZ4mainE3$_0.anon" addrspace(4)** %this.addr, align 8, !tbaa !5
+  %0 = bitcast float addrspace(4)** %A to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) #5
+  %1 = bitcast i32 addrspace(4)** %B to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %1) #5
+  %2 = bitcast %struct._ZTS5State.State addrspace(4)** %C to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %2) #5
+  %3 = load float addrspace(4)*, float addrspace(4)** %A, align 8, !tbaa !5
+  %4 = load i32 addrspace(4)*, i32 addrspace(4)** %B, align 8, !tbaa !5
+  %5 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C, align 8, !tbaa !5
+  call spir_func void @_Z3fooPfPiP5State(float addrspace(4)* %3, i32 addrspace(4)* %4, %struct._ZTS5State.State addrspace(4)* %5)
+  %6 = bitcast %struct._ZTS5State.State addrspace(4)** %C to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %6) #5
+  %7 = bitcast i32 addrspace(4)** %B to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %7) #5
+  %8 = bitcast float addrspace(4)** %A to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %8) #5
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* captures(none)) #1
+
+; CHECK-LLVM: define spir_func void @{{.*}}foo
+; Function Attrs: norecurse nounwind
+define spir_func void @_Z3fooPfPiP5State(float addrspace(4)* %A, i32 addrspace(4)* %B, %struct._ZTS5State.State addrspace(4)* %C) #3 {
+entry:
+; CHECK-LLVM: %[[FLOAT_FUNC_PARAM:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[INT_FUNC_PARAM:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[STRUCT_FUNC_PARAM:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+  %A.addr = alloca float addrspace(4)*, align 8
+  %B.addr = alloca i32 addrspace(4)*, align 8
+  %C.addr = alloca %struct._ZTS5State.State addrspace(4)*, align 8
+; CHECK-LLVM: %[[FLOAT_VAR:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[INT_VAR:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[STRUCT_VAR:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[DOUBLE_VAR:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[INT_VAR_1:[[:alnum:].]+]] = alloca i32, align 4
+  %x = alloca float addrspace(4)*, align 8
+  %y = alloca i32 addrspace(4)*, align 8
+  %z = alloca %struct._ZTS5State.State addrspace(4)*, align 8
+  %t = alloca double addrspace(4)*, align 8
+  %s = alloca i32, align 4
+  store float addrspace(4)* %A, float addrspace(4)** %A.addr, align 8, !tbaa !5
+  store i32 addrspace(4)* %B, i32 addrspace(4)** %B.addr, align 8, !tbaa !5
+  store %struct._ZTS5State.State addrspace(4)* %C, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+  %0 = bitcast float addrspace(4)** %x to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) #5
+  %1 = bitcast i32 addrspace(4)** %y to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %1) #5
+  %2 = bitcast %struct._ZTS5State.State addrspace(4)** %z to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %2) #5
+; CHECK-LLVM: %[[FLOAT_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[FLOAT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[FLOAT_FUNC_PARAM_LOAD]], ptr @[[PARAM_3_CACHE_0_TODO:]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL]], ptr %[[FLOAT_VAR]]
+  %3 = load float addrspace(4)*, float addrspace(4)** %A.addr, align 8, !tbaa !5
+  %4 = call float addrspace(4)* @llvm.ptr.annotation.p4f32.p1i8(float addrspace(4)* %3, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  store float addrspace(4)* %4, float addrspace(4)** %x, align 8, !tbaa !5
+; CHECK-LLVM: %[[INT_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[INT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[INT_FUNC_PARAM_LOAD]], ptr @[[PARAM_12_CACHE_0:]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL]], ptr %[[INT_VAR]]
+  %5 = load i32 addrspace(4)*, i32 addrspace(4)** %B.addr, align 8, !tbaa !5
+  %6 = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %5, i8 addrspace(1)* getelementptr inbounds ([26 x i8], [26 x i8] addrspace(1)* @.str.2, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  store i32 addrspace(4)* %6, i32 addrspace(4)** %y, align 8, !tbaa !5
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD:[0-9]+]] = [[WHOLE_STRUCT_LOAD_INST:load\ ptr\ addrspace\(4\),\ ptr.*]][[STRUCT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[WHOLE_STRUCT_LOAD]], ptr @[[PARAM_2_CACHE_127:]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL]], ptr %[[STRUCT_VAR]]
+  %7 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+  %8 = call %struct._ZTS5State.State addrspace(4)* @llvm.ptr.annotation.p4s_struct._ZTS5State.States(%struct._ZTS5State.State addrspace(4)* %7, i8 addrspace(1)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(1)* @.str.3, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  store %struct._ZTS5State.State addrspace(4)* %8, %struct._ZTS5State.State addrspace(4)** %z, align 8, !tbaa !5
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD_FOR_FLOAT:[0-9]+]] = [[WHOLE_STRUCT_LOAD_INST]][[STRUCT_FUNC_PARAM]]
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD_FOR_FLOAT_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[WHOLE_STRUCT_LOAD_FOR_FLOAT]]
+; CHECK-LLVM: %[[FLOAT_FIELD_GEP:[[:alnum:].]+]] = getelementptr inbounds %struct{{.*}}State, ptr addrspace(4) %[[WHOLE_STRUCT_LOAD_FOR_FLOAT_BC]], i32 0, i32 0
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[FLOAT_FIELD_GEP]], ptr @[[PARAM_3_CACHE_127:]]
+; CHECK-LLVM: %[[INTRINSIC_CALL_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[INTRINSIC_CALL]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL_BC]], ptr %[[FLOAT_VAR]]
+  %9 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+  %Field1 = getelementptr inbounds %struct._ZTS5State.State, %struct._ZTS5State.State addrspace(4)* %9, i32 0, i32 0
+  %10 = call float addrspace(4)* @llvm.ptr.annotation.p4f32.p1i8(float addrspace(4)* %Field1, i8 addrspace(1)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(1)* @.str.4, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  store float addrspace(4)* %10, float addrspace(4)** %x, align 8, !tbaa !5
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD_FOR_INT:[0-9]+]] = [[WHOLE_STRUCT_LOAD_INST]][[STRUCT_FUNC_PARAM]]
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD_FOR_INT_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[WHOLE_STRUCT_LOAD_FOR_INT]]
+; CHECK-LLVM: %[[INT_FIELD_GEP:[[:alnum:].]+]] = getelementptr inbounds %struct{{.*}}State, ptr addrspace(4) %[[WHOLE_STRUCT_LOAD_FOR_INT_BC]], i32 0, i32 1
+; The annotation for the succeeding intrinsic isn't required to be preserved
+; during translation
+; CHECK-LLVM: store ptr addrspace(4) %{{.*}}, ptr %[[INT_VAR]]
+  %11 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+  %Field2 = getelementptr inbounds %struct._ZTS5State.State, %struct._ZTS5State.State addrspace(4)* %11, i32 0, i32 1
+  %12 = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %Field2, i8 addrspace(1)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(1)* @.str.5, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  store i32 addrspace(4)* %12, i32 addrspace(4)** %y, align 8, !tbaa !5
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD:[0-9]+]] = [[WHOLE_STRUCT_LOAD_INST]][[STRUCT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[WHOLE_STRUCT_LOAD]], ptr @[[PARAM_15_CACHE_127:]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL]], ptr %[[STRUCT_VAR]]
+  %13 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+  %14 = call %struct._ZTS5State.State addrspace(4)* @llvm.ptr.annotation.p4s_struct._ZTS5State.States(%struct._ZTS5State.State addrspace(4)* %13, i8 addrspace(1)* getelementptr inbounds ([28 x i8], [28 x i8] addrspace(1)* @.str.6, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  store %struct._ZTS5State.State addrspace(4)* %14, %struct._ZTS5State.State addrspace(4)** %z, align 8, !tbaa !5
+  %15 = bitcast double addrspace(4)** %t to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %15) #5
+; CHECK-LLVM: %[[FLOAT_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[FLOAT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[FLOAT_FUNC_PARAM_LOAD]], ptr @[[PARAM_3_CACHE_0:]]
+; CHECK-LLVM: %[[BITCAST_FLOAT_TO_DOUBLE:[[:alnum:].]+]] = bitcast ptr addrspace(4) %[[INTRINSIC_CALL]] to ptr addrspace(4)
+; CHECK-LLVM: store ptr addrspace(4) %[[BITCAST_FLOAT_TO_DOUBLE]], ptr %[[DOUBLE_VAR]]
+  %16 = load float addrspace(4)*, float addrspace(4)** %A.addr, align 8, !tbaa !5
+  %17 = bitcast float addrspace(4)* %16 to double addrspace(4)*
+  %18 = call double addrspace(4)* @llvm.ptr.annotation.p4f64.p1i8(double addrspace(4)* %17, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  store double addrspace(4)* %18, double addrspace(4)** %t, align 8, !tbaa !5
+; CHECK-LLVM: %[[FLOAT_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[FLOAT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[FLOAT_FUNC_PARAM_LOAD]], ptr @[[PARAM_3_CACHE_0:]]
+; CHECK-LLVM: %[[INTRINSIC_CALL_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[INTRINSIC_CALL]]
+; CHECK-LLVM: store float 5.000000e+00, ptr addrspace(4) %[[INTRINSIC_CALL_BC]]
+  %19 = load float addrspace(4)*, float addrspace(4)** %A.addr, align 8, !tbaa !5
+  %20 = call float addrspace(4)* @llvm.ptr.annotation.p4f32.p1i8(float addrspace(4)* %19, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  store float 5.000000e+00, float addrspace(4)* %20, align 4, !tbaa !5
+  %21 = bitcast i32* %s to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %21) #5
+; CHECK-LLVM: %[[INT1_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[INT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[INT1_FUNC_PARAM_LOAD]], ptr @[[PARAM_12_CACHE_0:]]
+; CHECK-LLVM: %[[INTRINSIC_CALL_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[INTRINSIC_CALL]]
+; CHECK-LLVM: %[[INTRINSIC_RESULT_LOAD:[[:alnum:].]+]] = load i32, ptr addrspace(4) %[[INTRINSIC_CALL_BC]]
+; CHECK-LLVM: store i32 %[[INTRINSIC_RESULT_LOAD]], ptr %[[INT_VAR_1]]
+  %22 = load i32 addrspace(4)*, i32 addrspace(4)** %B.addr, align 8, !tbaa !5
+  %23 = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %22, i8 addrspace(1)* getelementptr inbounds ([26 x i8], [26 x i8] addrspace(1)* @.str.2, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+  %24 = load i32, i32 addrspace(4)* %23, align 4, !tbaa !5
+  store i32 %24, i32* %s, align 4, !tbaa !5
+  %25 = bitcast i32* %s to i8*
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %25) #5
+  %26 = bitcast double addrspace(4)** %t to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %26) #5
+  %27 = bitcast %struct._ZTS5State.State addrspace(4)** %z to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %27) #5
+  %28 = bitcast i32 addrspace(4)** %y to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %28) #5
+  %29 = bitcast float addrspace(4)** %x to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %29) #5
+  ret void
+}
+
+; Function Attrs: nounwind willreturn
+declare float addrspace(4)* @llvm.ptr.annotation.p4f32.p1i8(float addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #4
+
+; Function Attrs: nounwind willreturn
+declare i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #4
+
+; Function Attrs: nounwind willreturn
+declare %struct._ZTS5State.State addrspace(4)* @llvm.ptr.annotation.p4s_struct._ZTS5State.States(%struct._ZTS5State.State addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #4
+
+; Function Attrs: nounwind willreturn
+declare double addrspace(4)* @llvm.ptr.annotation.p4f64.p1i8(double addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #4
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "sycl-module-id"="/tmp/lsu.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind willreturn }
+attributes #2 = { inlinehint norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind willreturn }
+attributes #5 = { nounwind }
+attributes #6 = { readnone }
+
+!llvm.module.flags = !{!0}
+!opencl.spir.version = !{!1}
+!spirv.Source = !{!2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 2}
+!2 = !{i32 4, i32 100000}
+!3 = !{!"clang version 11.0.0"}
+!4 = !{}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"any pointer", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C++ TBAA"}
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/fpga_lsu_function_call.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/fpga_lsu_function_call.ll
new file mode 100644
index 0000000000000..254ea80f8a8d8
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/fpga_lsu_function_call.ll
@@ -0,0 +1,36 @@
+; RUN: llc -O0 -verify-machineinstrs  -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_fpga_memory_accesses %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV
+
+; CHECK-SPIRV-DAG: OpCapability FPGAMemoryAccessesINTEL
+; CHECK-SPIRV-DAG: OpExtension "SPV_INTEL_fpga_memory_accesses"
+; CHECK-SPIRV: OpDecorate %[[#DecTarget:]] BurstCoalesceINTEL
+; CHECK-SPIRV: %[[#DecTarget]] = OpFunctionCall 
+
+; ModuleID = 'test.bc'
+source_filename = "llvm-link"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%struct.MyStruct = type { i32 }
+
+$_ZN8MyStructaSERKS_ = comdat any
+
+$accessor = comdat any
+
+ at .str.1 = private unnamed_addr addrspace(1) constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata"
+ at .str.2 = private unnamed_addr addrspace(1) constant [11 x i8] c"{params:1}\00", section "llvm.metadata"
+
+define spir_func void @foo(ptr %Ptr, ptr byval(%struct.MyStruct) align 4 %Val) {
+entry:
+  %Ptr.ascast = addrspacecast ptr %Ptr to ptr addrspace(4)
+  %Val.ascast = addrspacecast ptr %Val to ptr addrspace(4)
+  %call = call spir_func noundef ptr addrspace(4) @accessor(ptr addrspace(4) %Ptr.ascast)
+  %0 = call ptr addrspace(4) @llvm.ptr.annotation.p4.p1(ptr addrspace(4) %call, ptr addrspace(1) @.str.2, ptr addrspace(1) @.str.1, i32 0, ptr addrspace(1) null)
+  %call1 = call spir_func ptr addrspace(4) @_ZN8MyStructaSERKS_(ptr addrspace(4) %0, ptr addrspace(4) %Val.ascast)
+  ret void
+}
+
+declare ptr addrspace(4) @llvm.ptr.annotation.p4.p1(ptr addrspace(4), ptr addrspace(1), ptr addrspace(1), i32, ptr addrspace(1))
+
+declare spir_func ptr addrspace(4) @_ZN8MyStructaSERKS_(ptr addrspace(4) %this, ptr addrspace(4) %op)
+
+declare spir_func ptr addrspace(4) @accessor(ptr addrspace(4) %this)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/intel_fpga_lsu_optimized.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/intel_fpga_lsu_optimized.ll
new file mode 100644
index 0000000000000..60f8173eb4bdd
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/intel_fpga_lsu_optimized.ll
@@ -0,0 +1,152 @@
+; LLVM IR generated by Intel SYCL Clang compiler (https://github.com/intel/llvm)
+; SYCL source code can be found below:
+
+; #include <CL/sycl.hpp>
+; #include <CL/sycl/intel/fpga_extensions.hpp>
+;
+; int main() {
+;   cl::sycl::queue Queue{cl::sycl::intel::fpga_emulator_selector{}};
+;
+;   {
+;     cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
+;     cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
+;
+;     Queue.submit([&](cl::sycl::handler &cgh) {
+;       auto output_accessor =
+;           output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
+;       auto input_accessor =
+;           input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
+;
+;       cgh.single_task<class kernel>([=] {
+;         auto input_ptr = input_accessor.get_pointer();
+;         auto output_ptr = output_accessor.get_pointer();
+;
+;         using PrefetchingLSU =
+;             cl::sycl::intel::lsu<cl::sycl::intel::prefetch<true>,
+;                                  cl::sycl::intel::statically_coalesce<false>>;
+;
+;         using BurstCoalescedLSU =
+;             cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
+;                                  cl::sycl::intel::statically_coalesce<false>>;
+;
+;         using CachingLSU =
+;             cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
+;                                  cl::sycl::intel::cache<1024>,
+;                                  cl::sycl::intel::statically_coalesce<false>>;
+;
+;         using PipelinedLSU = cl::sycl::intel::lsu<>;
+;
+;         int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
+;         int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
+;
+;         BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
+;         PipelinedLSU::store(output_ptr + 1, Y);  // output_ptr[1] = Y
+;       });
+;     });
+;   }
+;
+;   return 0;
+; }
+
+; Check that translation of optimized IR doesn't crash:
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_fpga_memory_accesses -o %t.spv
+
+; Check that reverse translation restore ptr.annotations correctly:
+; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
+%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
+
+$"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel" = comdat any
+
+ at .str = private unnamed_addr addrspace(1) constant [26 x i8] c"{params:12}{cache-size:0}\00", section "llvm.metadata"
+ at .str.1 = private unnamed_addr addrspace(1) constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata"
+ at .str.2 = private unnamed_addr addrspace(1) constant [28 x i8] c"{params:7}{cache-size:1024}\00", section "llvm.metadata"
+ at .str.3 = private unnamed_addr addrspace(1) constant [25 x i8] c"{params:5}{cache-size:0}\00", section "llvm.metadata"
+ at .str.4 = private unnamed_addr addrspace(1) constant [25 x i8] c"{params:0}{cache-size:0}\00", section "llvm.metadata"
+
+; CHECK-LLVM: [[PTR_i27_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:12}
+; CHECK-LLVM: [[PTR_i15_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:7}{cache-size:1024}
+; CHECK-LLVM: [[PTR_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:5}
+; CHECK-LLVM: [[PTR_i_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:0}{cache-size:0}
+
+; Function Attrs: norecurse
+define weak_odr dso_local spir_kernel void @"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel"(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_buffer_location !8 {
+entry:
+  %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0
+  %1 = load i64, i64* %0, align 8
+  %add.ptr.i27 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %1
+  %2 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0
+  %3 = load i64, i64* %2, align 8
+  %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %3
+  %4 = addrspacecast i32 addrspace(1)* %add.ptr.i27 to i32 addrspace(4)*
+  %5 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %4, i8 addrspace(1)* getelementptr inbounds ([26 x i8], [26 x i8] addrspace(1)* @.str, i64 0, i64 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i64 0, i64 0), i32 0, i8 addrspace(1)* null) #2
+  %6 = load i32, i32 addrspace(4)* %5, align 4, !tbaa !9
+  ; CHECK-LLVM: [[PTR_i27:[%0-9a-z.]+]] = getelementptr inbounds i32, ptr addrspace(1) {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
+  ; CHECK-LLVM: [[PTR_i:[%0-9a-z.]+]] = getelementptr inbounds i32, ptr addrspace(1) {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
+  ; CHECK-LLVM: [[PTR_i27_AS_CAST:[%0-9a-z.]+]] = addrspacecast ptr addrspace(1) [[PTR_i27]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) [[PTR_i27_AS_CAST]], ptr [[PTR_i27_ANNOT_STR]]
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC2:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL_BC]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_RESULT_LOAD:[%0-9a-z.]+]] = load i32, ptr addrspace(4) [[PTR_ANNOT_CALL_BC2]]
+  %add.ptr.i15.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i27, i64 1
+  %7 = addrspacecast i32 addrspace(1)* %add.ptr.i15.i to i32 addrspace(4)*
+  %8 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %7, i8 addrspace(1)* getelementptr inbounds ([28 x i8], [28 x i8] addrspace(1)* @.str.2, i64 0, i64 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i64 0, i64 0), i32 0, i8 addrspace(1)* null) #2
+  %9 = load i32, i32 addrspace(4)* %8, align 4, !tbaa !9
+  ; CHECK-LLVM: [[PTR_i15_i:[%0-9a-z.]+]] = getelementptr inbounds i32, ptr addrspace(1) {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
+  ; CHECK-LLVM: [[PTR_i15_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast ptr addrspace(1) [[PTR_i15_i]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) [[PTR_i15_i_AS_CAST]], ptr [[PTR_i15_i_ANNOT_STR]]
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC2:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL_BC]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_RESULT_LOAD_1:[%0-9a-z.]+]] = load i32, ptr addrspace(4) [[PTR_ANNOT_CALL_BC2]]
+  %10 = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)*
+  %11 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %10, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str.3, i64 0, i64 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i64 0, i64 0), i32 0, i8 addrspace(1)* null) #2
+  store i32 %6, i32 addrspace(4)* %11, align 4, !tbaa !9
+  ; CHECK-LLVM: [[PTR_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast ptr addrspace(1) [[PTR_i]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) [[PTR_i_AS_CAST]], ptr [[PTR_i_ANNOT_STR]]
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC2:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL_BC]] to ptr addrspace(4)
+  ; CHECK-LLVM: store i32 [[PTR_RESULT_LOAD]], ptr addrspace(4) [[PTR_ANNOT_CALL_BC2]]
+  %add.ptr.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 1
+  %12 = addrspacecast i32 addrspace(1)* %add.ptr.i.i to i32 addrspace(4)*
+  %13 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %12, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str.4, i64 0, i64 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i64 0, i64 0), i32 0, i8 addrspace(1)* null) #2
+  store i32 %9, i32 addrspace(4)* %13, align 4, !tbaa !9
+  ; CHECK-LLVM: [[PTR_i_i:[%0-9a-z.]+]] = getelementptr inbounds i32, ptr addrspace(1) {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
+  ; CHECK-LLVM: [[PTR_i_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast ptr addrspace(1) [[PTR_i_i]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) [[PTR_i_i_AS_CAST]], ptr [[PTR_i_i_ANNOT_STR]]
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL]] to ptr addrspace(4)
+  ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC2:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL_BC]] to ptr addrspace(4)
+  ; CHECK-LLVM: store i32 [[PTR_RESULT_LOAD_1]], ptr addrspace(4) [[PTR_ANNOT_CALL_BC2]]
+  ret void
+}
+
+; Function Attrs: nounwind willreturn
+declare i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #1
+
+attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="fpga_lsu.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind willreturn }
+attributes #2 = { nounwind readnone }
+
+!opencl.spir.version = !{!0}
+!spirv.Source = !{!1}
+!llvm.ident = !{!2}
+!llvm.module.flags = !{!3}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
+!2 = !{!"clang version 12.0.0"}
+!3 = !{i32 1, !"wchar_size", i32 4}
+!4 = !{i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0}
+!5 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"}
+!6 = !{!"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>", !"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>"}
+!7 = !{!"", !"", !"", !"", !"", !"", !"", !""}
+!8 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"int", !11, i64 0}
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C++ TBAA"}



More information about the llvm-commits mailing list