[llvm] FEAT : Added supoort for the extenion SPV_INTEL_fpga_memory_acesses (PR #133210)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 22:41:51 PDT 2025
https://github.com/sumesh-s-mcw created https://github.com/llvm/llvm-project/pull/133210
None
>From 3a9338d7fc1691457ed146f456ac8380a08e05ee Mon Sep 17 00:00:00 2001
From: sumesh-s-mcw <sumesh.suresh at multicorewareinc.com>
Date: Fri, 21 Mar 2025 11:01:00 +0530
Subject: [PATCH] FEAT : Added supoort for the extenion
SPV_INTEL_fpga_memory_acesses
---
llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp | 2 +
llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 5 +
.../Target/SPIRV/SPIRVPrepareFunctions.cpp | 143 ++++++++-
.../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 6 +-
.../IntelFPGAMemoryAccesses.ll | 273 ++++++++++++++++++
.../fpga_lsu_function_call.ll | 36 +++
.../intel_fpga_lsu_optimized.ll | 152 ++++++++++
7 files changed, 604 insertions(+), 13 deletions(-)
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/IntelFPGAMemoryAccesses.ll
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/fpga_lsu_function_call.ll
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/intel_fpga_lsu_optimized.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 37119bf01545c..b6da420910673 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -89,6 +89,8 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
SPIRV::Extension::Extension::SPV_KHR_cooperative_matrix},
{"SPV_KHR_non_semantic_info",
SPIRV::Extension::Extension::SPV_KHR_non_semantic_info},
+ {"SPV_INTEL_fpga_memory_accesses",
+ SPIRV::Extension::Extension::SPV_INTEL_fpga_memory_accesses},
{"SPV_INTEL_long_composites",
SPIRV::Extension::Extension::SPV_INTEL_long_composites},
{"SPV_INTEL_fp_max_error",
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index acc8c014cb26b..14a8aee845d38 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -893,6 +893,11 @@ static void addOpDecorateReqs(const MachineInstr &MI, unsigned DecIndex,
} else if (Dec == SPIRV::Decoration::FPMaxErrorDecorationINTEL) {
Reqs.addRequirements(SPIRV::Capability::FPMaxErrorINTEL);
Reqs.addExtension(SPIRV::Extension::SPV_INTEL_fp_max_error);
+ } else if (Dec == SPIRV::Decoration::BurstCoalesceINTEL ||
+ Dec == SPIRV::Decoration::CacheSizeINTEL ||
+ Dec == SPIRV::Decoration::DontStaticallyCoalesceINTEL ||
+ Dec == SPIRV::Decoration::PrefetchINTEL) {
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_fpga_memory_accesses);
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 028699e56a946..1fac5b03630d0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -91,11 +91,62 @@ static Function *getOrCreateFunction(Module *M, Type *RetTy,
return NewF;
}
+enum IntelFPGAMemoryAccessesVal {
+ BurstCoalesce = 0x1,
+ CacheSizeFlag = 0x2,
+ DontStaticallyCoalesce = 0x4,
+ PrefetchFlag = 0x8
+};
+
+using DecorationsInfoVec =
+ std::vector<std::pair<unsigned, std::vector<std::string>>>;
+
+struct IntelLSUControlsInfo {
+ void setWithBitMask(unsigned ParamsBitMask) {
+ if (ParamsBitMask & IntelFPGAMemoryAccessesVal::BurstCoalesce)
+ BurstCoalesce = true;
+ if (ParamsBitMask & IntelFPGAMemoryAccessesVal::CacheSizeFlag)
+ CacheSizeInfo = 0;
+ if (ParamsBitMask & IntelFPGAMemoryAccessesVal::DontStaticallyCoalesce)
+ DontStaticallyCoalesce = true;
+ if (ParamsBitMask & IntelFPGAMemoryAccessesVal::PrefetchFlag)
+ PrefetchInfo = 0;
+ }
+
+ DecorationsInfoVec getDecorationsFromCurrentState() {
+ DecorationsInfoVec ResultVec;
+ if (BurstCoalesce)
+ ResultVec.emplace_back(SPIRV::Decoration::Decoration::BurstCoalesceINTEL,
+ std::vector<std::string>());
+ if (DontStaticallyCoalesce)
+ ResultVec.emplace_back(
+ SPIRV::Decoration::Decoration::DontStaticallyCoalesceINTEL,
+ std::vector<std::string>());
+
+ if (CacheSizeInfo.has_value()) {
+ ResultVec.emplace_back(
+ SPIRV::Decoration::Decoration::CacheSizeINTEL,
+ std::vector<std::string>{std::to_string(CacheSizeInfo.value())});
+ }
+ if (PrefetchInfo.has_value()) {
+ ResultVec.emplace_back(
+ SPIRV::Decoration::Decoration::PrefetchINTEL,
+ std::vector<std::string>{std::to_string(PrefetchInfo.value())});
+ }
+ return ResultVec;
+ }
+
+ bool BurstCoalesce = false;
+ std::optional<unsigned> CacheSizeInfo;
+ bool DontStaticallyCoalesce = false;
+ std::optional<unsigned> PrefetchInfo;
+};
+
static bool lowerIntrinsicToFunction(IntrinsicInst *Intrinsic) {
- // For @llvm.memset.* intrinsic cases with constant value and length arguments
- // are emulated via "storing" a constant array to the destination. For other
- // cases we wrap the intrinsic in @spirv.llvm_memset_* function and expand the
- // intrinsic to a loop via expandMemSetAsLoop().
+ // For @llvm.memset.* intrinsic cases with constant value and length
+ // arguments are emulated via "storing" a constant array to the destination.
+ // For other cases we wrap the intrinsic in @spirv.llvm_memset_* function
+ // and expand the intrinsic to a loop via expandMemSetAsLoop().
if (auto *MSI = dyn_cast<MemSetInst>(Intrinsic))
if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
return false; // It is handled later using OpCopyMemorySized.
@@ -243,7 +294,7 @@ static SmallVector<Metadata *> parseAnnotation(Value *I,
: SmallVector<Metadata *>{};
}
-static void lowerPtrAnnotation(IntrinsicInst *II) {
+static void lowerPtrAnnotation(IntrinsicInst *II, const SPIRVSubtarget &STI) {
LLVMContext &Ctx = II->getContext();
Type *Int32Ty = Type::getInt32Ty(Ctx);
@@ -256,10 +307,77 @@ static void lowerPtrAnnotation(IntrinsicInst *II) {
std::string Anno =
getAnnotation(II->getArgOperand(1),
4 < II->arg_size() ? II->getArgOperand(4) : nullptr);
+ // messed code will correct it once it is working
+ // PARSE THE ANOTATION
+ std::regex DecorationRegex("\\{\\w([\\w:,-]|\"[^\"]*\")*\\}");
+ using RegexIterT = std::sregex_iterator;
+ RegexIterT DecorationsIt(Anno.cbegin(), Anno.cend(), DecorationRegex);
+ RegexIterT DecorationsEnd;
+ IntelLSUControlsInfo LSUControls;
+ for (; DecorationsIt != DecorationsEnd; ++DecorationsIt) {
+ std::smatch Match = *DecorationsIt;
+ std::string DecorationStr = Match.str();
+ std::string AnnotatedDecoration =
+ DecorationStr.substr(1, DecorationStr.length() - 2);
+ llvm::StringRef AnnotatedRef(AnnotatedDecoration);
+ std::pair<llvm::StringRef, llvm::StringRef> Split = AnnotatedRef.split(':');
+ llvm::StringRef Name = Split.first, ValueStr = Split.second;
+
+ bool canUseFPGA = STI.canUseExtension(
+ SPIRV::Extension::Extension::SPV_INTEL_fpga_memory_accesses);
+ if (canUseFPGA) {
+ if (Name == "params") {
+ unsigned ParamsBitMask = 0;
+ bool Failure = ValueStr.getAsInteger(10, ParamsBitMask);
+ assert(!Failure && "Non-integer LSU controls value");
+ (void)Failure;
+ LSUControls.setWithBitMask(ParamsBitMask);
+ } else if (Name == "cache-size") {
+ if (!LSUControls.CacheSizeInfo.has_value())
+ continue;
+ unsigned CacheSizeValue = 0;
+ bool Failure = ValueStr.getAsInteger(10, CacheSizeValue);
+ assert(!Failure && "Non-integer cache size value");
+ (void)Failure;
+ LSUControls.CacheSizeInfo = CacheSizeValue;
+ }
+ }
+ }
+
+ DecorationsInfoVec currentDecorations =
+ LSUControls.getDecorationsFromCurrentState();
- // Parse the annotation.
SmallVector<Metadata *> MDs = parseAnnotation(II, Anno, Ctx, Int32Ty);
+ for (const auto &Dec : currentDecorations) {
+ unsigned DecKind = Dec.first;
+ const std::vector<std::string> &DecValues = Dec.second;
+ SmallVector<Metadata *> metaDataItem;
+ auto Decoration = ConstantAsMetadata::get(
+ ConstantInt::get(Int32Ty, static_cast<uint32_t>(DecKind)));
+ metaDataItem.push_back(Decoration);
+ if (!DecValues.empty()) {
+ for (const auto &val : DecValues) {
+ int32_t numValue;
+ if (llvm::to_integer(val, numValue, 10)) {
+ metaDataItem.push_back(
+ ConstantAsMetadata::get(ConstantInt::get(Int32Ty, numValue)));
+ } else {
+ metaDataItem.push_back(MDString::get(Ctx, val));
+ }
+ }
+ }
+ MDs.push_back(MDNode::get(Ctx, metaDataItem));
+
+ }
+
+ // MDs.push_back(ConstantAsMetadata::get(
+ // ConstantInt::get(Type::getInt32Ty(Ctx), DecKind)));
+ // for (const std::string &Value : DecValues) {
+ // MDs.push_back(MDString::get(Ctx, Value));
+ // }
+ //}
+
// If the annotation string is not parsed successfully we don't know the
// format used and output it as a general UserSemantic decoration.
// Otherwise MDs is a Metadata tuple (a decoration list) in the format
@@ -281,9 +399,9 @@ static void lowerPtrAnnotation(IntrinsicInst *II) {
static void lowerFunnelShifts(IntrinsicInst *FSHIntrinsic) {
// Get a separate function - otherwise, we'd have to rework the CFG of the
- // current one. Then simply replace the intrinsic uses with a call to the new
- // function.
- // Generate LLVM IR for i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i* %c)
+ // current one. Then simply replace the intrinsic uses with a call to the
+ // new function. Generate LLVM IR for i* @spirv.llvm_fsh?_i* (i* %a, i* %b,
+ // i* %c)
Module *M = FSHIntrinsic->getModule();
FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType();
Type *FSHRetTy = FSHFuncTy->getReturnType();
@@ -330,8 +448,8 @@ static void lowerFunnelShifts(IntrinsicInst *FSHIntrinsic) {
// the LSBs.
SecShift = IRB.CreateShl(FSHFunc->getArg(0), SubRotateVal);
} else {
- // ...and right-shift the less significant int by this number, zero-filling
- // the MSBs.
+ // ...and right-shift the less significant int by this number,
+ // zero-filling the MSBs.
SecShift = IRB.CreateLShr(FSHFunc->getArg(1), SubRotateVal);
}
// A simple binary addition of the shifted ints yields the final result.
@@ -420,7 +538,8 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
II, Intrinsic::SPVIntrinsics::spv_lifetime_end, {1});
break;
case Intrinsic::ptr_annotation:
- lowerPtrAnnotation(II);
+ const SPIRVSubtarget &STI = TM.getSubtarget<SPIRVSubtarget>(*F);
+ lowerPtrAnnotation(II, STI);
Changed = true;
break;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index caee778eddbc4..5bbe6b9e86601 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -513,6 +513,7 @@ defm LongCompositesINTEL : CapabilityOperand<6089, 0, 0, [SPV_INTEL_long_composi
defm BindlessImagesINTEL : CapabilityOperand<6528, 0, 0, [SPV_INTEL_bindless_images], []>;
defm MemoryAccessAliasingINTEL : CapabilityOperand<5910, 0, 0, [SPV_INTEL_memory_access_aliasing], []>;
defm FPMaxErrorINTEL : CapabilityOperand<6169, 0, 0, [SPV_INTEL_fp_max_error], []>;
+defm FPGAMemoryAccessesINTEL : CapabilityOperand<5898, 0, 0, [SPV_INTEL_fpga_memory_accesses], []>;
//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
@@ -1264,7 +1265,10 @@ defm FunctionFloatingPointModeINTEL : DecorationOperand<6080, 0, 0, [], [Functio
defm AliasScopeINTEL : DecorationOperand<5914, 0, 0, [], [MemoryAccessAliasingINTEL]>;
defm NoAliasINTEL : DecorationOperand<5915, 0, 0, [], [MemoryAccessAliasingINTEL]>;
defm FPMaxErrorDecorationINTEL : DecorationOperand<6170, 0, 0, [], [FPMaxErrorINTEL]>;
-
+defm BurstCoalesceINTEL : DecorationOperand<5899, 0, 0, [], [FPGAMemoryAccessesINTEL]>;
+defm CacheSizeINTEL : DecorationOperand<5900, 0, 0, [], [FPGAMemoryAccessesINTEL]>;
+defm DontStaticallyCoalesceINTEL : DecorationOperand<5901, 0, 0, [], [FPGAMemoryAccessesINTEL]>;
+defm PrefetchINTEL : DecorationOperand<5902, 0, 0, [], [FPGAMemoryAccessesINTEL]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define BuiltIn enum values and at the same time
// SymbolicOperand entries with string mnemonics, versioning, extensions and
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/IntelFPGAMemoryAccesses.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/IntelFPGAMemoryAccesses.ll
new file mode 100644
index 0000000000000..0ad6e84db9926
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/IntelFPGAMemoryAccesses.ll
@@ -0,0 +1,273 @@
+; LLVM IR generated by Intel SYCL Clang compiler (https://github.com/intel/llvm)
+; SYCL source code can be found below:
+
+; #define BURST_COAL 0x1
+; #define CACHE_SIZE_FLAG 0x2
+; #define DONT_STATICALLY_COAL 0x4
+; #define PREFETCH 0x8
+; struct State {
+; float Field1;
+; int Field2;
+; };
+;
+; void foo(float *A, int *B, State *C) {
+; float *x;
+; int *y;
+; State *z;
+; double *t;
+; x = __builtin_intel_fpga_mem(A, BURST_COAL | CACHE_SIZE_FLAG, 0);
+; y = __builtin_intel_fpga_mem(B, DONT_STATICALLY_COAL | PREFETCH, 0);
+; z = __builtin_intel_fpga_mem(C, CACHE_SIZE_FLAG, 127);
+; x = __builtin_intel_fpga_mem(&C->Field1, BURST_COAL | CACHE_SIZE_FLAG, 127);
+; y = __builtin_intel_fpga_mem(&C->Field2, 0, 127);
+; z = __builtin_intel_fpga_mem(C, BURST_COAL | CACHE_SIZE_FLAG | DONT_STATICALLY_COAL | PREFETCH, 127);
+; t = __builtin_intel_fpga_mem((double *) A, BURST_COAL | CACHE_SIZE_FLAG, 0);
+; *__builtin_intel_fpga_mem(A, BURST_COAL | CACHE_SIZE_FLAG, 0) = 5;
+; int s = *__builtin_intel_fpga_mem(B, DONT_STATICALLY_COAL | PREFETCH, 0);
+; }
+;
+; template <typename name, typename Func>
+; __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) {
+; kernelFunc();
+; }
+;
+; int main() {
+; kernel_single_task<class fake_kernel>([]() {
+; float *A;
+; int *B;
+; State *C;
+; foo(A, B, C);
+; });
+; return 0;
+; }
+
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_fpga_memory_accesses %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV
+
+; CHECK-SPIRV: OpCapability FPGAMemoryAccessesINTEL
+; CHECK-SPIRV: Extension "SPV_INTEL_fpga_memory_accesses"
+; Check that the semantically meaningless decoration was
+; translated as a mere annotation
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} UserSemantic "{params:0}{cache-size:127}"
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} BurstCoalesceINTEL
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} CacheSizeINTEL 0
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} CacheSizeINTEL 127
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} DontStaticallyCoalesceINTEL
+; CHECK-SPIRV-DAG: OpDecorate %{{[0-9]+}} PrefetchINTEL 0
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%"class._ZTSZ4mainE3$_0.anon" = type { i8 }
+%struct._ZTS5State.State = type { float, i32 }
+
+; CHECK-LLVM: [[PARAM_3_CACHE_0:@[a-z0-9_.]+]] = {{.*}}{params:3}{cache-size:0}
+ at .str = private unnamed_addr addrspace(1) constant [25 x i8] c"{params:3}{cache-size:0}\00", section "llvm.metadata"
+ at .str.1 = private unnamed_addr addrspace(1) constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata"
+; "params" bitmask doesn't hold 0x2 (cache size ON), so cache-size can be dropped
+; during translation
+; CHECK-LLVM: [[PARAM_12_CACHE_0:@[a-z0-9_.]+]] = {{.*}}{params:12}
+ at .str.2 = private unnamed_addr addrspace(1) constant [26 x i8] c"{params:12}{cache-size:0}\00", section "llvm.metadata"
+; CHECK-LLVM: [[PARAM_2_CACHE_127:@[a-z0-9_.]+]] = {{.*}}{params:2}{cache-size:127}
+ at .str.3 = private unnamed_addr addrspace(1) constant [27 x i8] c"{params:2}{cache-size:127}\00", section "llvm.metadata"
+; CHECK-LLVM: [[PARAM_3_CACHE_127:@[a-z0-9_.]+]] = {{.*}}{params:3}{cache-size:127}
+ at .str.4 = private unnamed_addr addrspace(1) constant [27 x i8] c"{params:3}{cache-size:127}\00", section "llvm.metadata"
+; Since "params" bitmask is set to 0, the next string isn't required to be preserved
+; during translation. Neither is the corresponding pointer annotation intrinsic.
+ at .str.5 = private unnamed_addr addrspace(1) constant [27 x i8] c"{params:0}{cache-size:127}\00", section "llvm.metadata"
+; CHECK-LLVM: [[PARAM_15_CACHE_127:@[a-z0-9_.]+]] = {{.*}}{params:15}{cache-size:127}
+ at .str.6 = private unnamed_addr addrspace(1) constant [28 x i8] c"{params:15}{cache-size:127}\00", section "llvm.metadata"
+
+; Function Attrs: norecurse nounwind
+define spir_kernel void @_ZTSZ4mainE11fake_kernel() #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 {
+entry:
+ %0 = alloca %"class._ZTSZ4mainE3$_0.anon", align 1
+ %1 = bitcast %"class._ZTSZ4mainE3$_0.anon"* %0 to i8*
+ call void @llvm.lifetime.start.p0i8(i64 1, i8* %1) #5
+ %2 = addrspacecast %"class._ZTSZ4mainE3$_0.anon"* %0 to %"class._ZTSZ4mainE3$_0.anon" addrspace(4)*
+ call spir_func void @"_ZZ4mainENK3$_0clEv"(%"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %2)
+ %3 = bitcast %"class._ZTSZ4mainE3$_0.anon"* %0 to i8*
+ call void @llvm.lifetime.end.p0i8(i64 1, i8* %3) #5
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* captures(none)) #1
+
+; Function Attrs: inlinehint norecurse nounwind
+define internal spir_func void @"_ZZ4mainENK3$_0clEv"(%"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this) #2 align 2 {
+entry:
+ %this.addr = alloca %"class._ZTSZ4mainE3$_0.anon" addrspace(4)*, align 8
+ %A = alloca float addrspace(4)*, align 8
+ %B = alloca i32 addrspace(4)*, align 8
+ %C = alloca %struct._ZTS5State.State addrspace(4)*, align 8
+ store %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this, %"class._ZTSZ4mainE3$_0.anon" addrspace(4)** %this.addr, align 8, !tbaa !5
+ %0 = bitcast float addrspace(4)** %A to i8*
+ call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) #5
+ %1 = bitcast i32 addrspace(4)** %B to i8*
+ call void @llvm.lifetime.start.p0i8(i64 8, i8* %1) #5
+ %2 = bitcast %struct._ZTS5State.State addrspace(4)** %C to i8*
+ call void @llvm.lifetime.start.p0i8(i64 8, i8* %2) #5
+ %3 = load float addrspace(4)*, float addrspace(4)** %A, align 8, !tbaa !5
+ %4 = load i32 addrspace(4)*, i32 addrspace(4)** %B, align 8, !tbaa !5
+ %5 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C, align 8, !tbaa !5
+ call spir_func void @_Z3fooPfPiP5State(float addrspace(4)* %3, i32 addrspace(4)* %4, %struct._ZTS5State.State addrspace(4)* %5)
+ %6 = bitcast %struct._ZTS5State.State addrspace(4)** %C to i8*
+ call void @llvm.lifetime.end.p0i8(i64 8, i8* %6) #5
+ %7 = bitcast i32 addrspace(4)** %B to i8*
+ call void @llvm.lifetime.end.p0i8(i64 8, i8* %7) #5
+ %8 = bitcast float addrspace(4)** %A to i8*
+ call void @llvm.lifetime.end.p0i8(i64 8, i8* %8) #5
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* captures(none)) #1
+
+; CHECK-LLVM: define spir_func void @{{.*}}foo
+; Function Attrs: norecurse nounwind
+define spir_func void @_Z3fooPfPiP5State(float addrspace(4)* %A, i32 addrspace(4)* %B, %struct._ZTS5State.State addrspace(4)* %C) #3 {
+entry:
+; CHECK-LLVM: %[[FLOAT_FUNC_PARAM:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[INT_FUNC_PARAM:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[STRUCT_FUNC_PARAM:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+ %A.addr = alloca float addrspace(4)*, align 8
+ %B.addr = alloca i32 addrspace(4)*, align 8
+ %C.addr = alloca %struct._ZTS5State.State addrspace(4)*, align 8
+; CHECK-LLVM: %[[FLOAT_VAR:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[INT_VAR:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[STRUCT_VAR:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[DOUBLE_VAR:[[:alnum:].]+]] = alloca ptr addrspace(4), align 8
+; CHECK-LLVM: %[[INT_VAR_1:[[:alnum:].]+]] = alloca i32, align 4
+ %x = alloca float addrspace(4)*, align 8
+ %y = alloca i32 addrspace(4)*, align 8
+ %z = alloca %struct._ZTS5State.State addrspace(4)*, align 8
+ %t = alloca double addrspace(4)*, align 8
+ %s = alloca i32, align 4
+ store float addrspace(4)* %A, float addrspace(4)** %A.addr, align 8, !tbaa !5
+ store i32 addrspace(4)* %B, i32 addrspace(4)** %B.addr, align 8, !tbaa !5
+ store %struct._ZTS5State.State addrspace(4)* %C, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+ %0 = bitcast float addrspace(4)** %x to i8*
+ call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) #5
+ %1 = bitcast i32 addrspace(4)** %y to i8*
+ call void @llvm.lifetime.start.p0i8(i64 8, i8* %1) #5
+ %2 = bitcast %struct._ZTS5State.State addrspace(4)** %z to i8*
+ call void @llvm.lifetime.start.p0i8(i64 8, i8* %2) #5
+; CHECK-LLVM: %[[FLOAT_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[FLOAT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[FLOAT_FUNC_PARAM_LOAD]], ptr @[[PARAM_3_CACHE_0_TODO:]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL]], ptr %[[FLOAT_VAR]]
+ %3 = load float addrspace(4)*, float addrspace(4)** %A.addr, align 8, !tbaa !5
+ %4 = call float addrspace(4)* @llvm.ptr.annotation.p4f32.p1i8(float addrspace(4)* %3, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ store float addrspace(4)* %4, float addrspace(4)** %x, align 8, !tbaa !5
+; CHECK-LLVM: %[[INT_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[INT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[INT_FUNC_PARAM_LOAD]], ptr @[[PARAM_12_CACHE_0:]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL]], ptr %[[INT_VAR]]
+ %5 = load i32 addrspace(4)*, i32 addrspace(4)** %B.addr, align 8, !tbaa !5
+ %6 = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %5, i8 addrspace(1)* getelementptr inbounds ([26 x i8], [26 x i8] addrspace(1)* @.str.2, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ store i32 addrspace(4)* %6, i32 addrspace(4)** %y, align 8, !tbaa !5
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD:[0-9]+]] = [[WHOLE_STRUCT_LOAD_INST:load\ ptr\ addrspace\(4\),\ ptr.*]][[STRUCT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[WHOLE_STRUCT_LOAD]], ptr @[[PARAM_2_CACHE_127:]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL]], ptr %[[STRUCT_VAR]]
+ %7 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+ %8 = call %struct._ZTS5State.State addrspace(4)* @llvm.ptr.annotation.p4s_struct._ZTS5State.States(%struct._ZTS5State.State addrspace(4)* %7, i8 addrspace(1)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(1)* @.str.3, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ store %struct._ZTS5State.State addrspace(4)* %8, %struct._ZTS5State.State addrspace(4)** %z, align 8, !tbaa !5
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD_FOR_FLOAT:[0-9]+]] = [[WHOLE_STRUCT_LOAD_INST]][[STRUCT_FUNC_PARAM]]
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD_FOR_FLOAT_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[WHOLE_STRUCT_LOAD_FOR_FLOAT]]
+; CHECK-LLVM: %[[FLOAT_FIELD_GEP:[[:alnum:].]+]] = getelementptr inbounds %struct{{.*}}State, ptr addrspace(4) %[[WHOLE_STRUCT_LOAD_FOR_FLOAT_BC]], i32 0, i32 0
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[FLOAT_FIELD_GEP]], ptr @[[PARAM_3_CACHE_127:]]
+; CHECK-LLVM: %[[INTRINSIC_CALL_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[INTRINSIC_CALL]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL_BC]], ptr %[[FLOAT_VAR]]
+ %9 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+ %Field1 = getelementptr inbounds %struct._ZTS5State.State, %struct._ZTS5State.State addrspace(4)* %9, i32 0, i32 0
+ %10 = call float addrspace(4)* @llvm.ptr.annotation.p4f32.p1i8(float addrspace(4)* %Field1, i8 addrspace(1)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(1)* @.str.4, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ store float addrspace(4)* %10, float addrspace(4)** %x, align 8, !tbaa !5
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD_FOR_INT:[0-9]+]] = [[WHOLE_STRUCT_LOAD_INST]][[STRUCT_FUNC_PARAM]]
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD_FOR_INT_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[WHOLE_STRUCT_LOAD_FOR_INT]]
+; CHECK-LLVM: %[[INT_FIELD_GEP:[[:alnum:].]+]] = getelementptr inbounds %struct{{.*}}State, ptr addrspace(4) %[[WHOLE_STRUCT_LOAD_FOR_INT_BC]], i32 0, i32 1
+; The annotation for the succeeding intrinsic isn't required to be preserved
+; during translation
+; CHECK-LLVM: store ptr addrspace(4) %{{.*}}, ptr %[[INT_VAR]]
+ %11 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+ %Field2 = getelementptr inbounds %struct._ZTS5State.State, %struct._ZTS5State.State addrspace(4)* %11, i32 0, i32 1
+ %12 = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %Field2, i8 addrspace(1)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(1)* @.str.5, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ store i32 addrspace(4)* %12, i32 addrspace(4)** %y, align 8, !tbaa !5
+; CHECK-LLVM: %[[WHOLE_STRUCT_LOAD:[0-9]+]] = [[WHOLE_STRUCT_LOAD_INST]][[STRUCT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[WHOLE_STRUCT_LOAD]], ptr @[[PARAM_15_CACHE_127:]]
+; CHECK-LLVM: store ptr addrspace(4) %[[INTRINSIC_CALL]], ptr %[[STRUCT_VAR]]
+ %13 = load %struct._ZTS5State.State addrspace(4)*, %struct._ZTS5State.State addrspace(4)** %C.addr, align 8, !tbaa !5
+ %14 = call %struct._ZTS5State.State addrspace(4)* @llvm.ptr.annotation.p4s_struct._ZTS5State.States(%struct._ZTS5State.State addrspace(4)* %13, i8 addrspace(1)* getelementptr inbounds ([28 x i8], [28 x i8] addrspace(1)* @.str.6, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ store %struct._ZTS5State.State addrspace(4)* %14, %struct._ZTS5State.State addrspace(4)** %z, align 8, !tbaa !5
+ %15 = bitcast double addrspace(4)** %t to i8*
+ call void @llvm.lifetime.start.p0i8(i64 8, i8* %15) #5
+; CHECK-LLVM: %[[FLOAT_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[FLOAT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[FLOAT_FUNC_PARAM_LOAD]], ptr @[[PARAM_3_CACHE_0:]]
+; CHECK-LLVM: %[[BITCAST_FLOAT_TO_DOUBLE:[[:alnum:].]+]] = bitcast ptr addrspace(4) %[[INTRINSIC_CALL]] to ptr addrspace(4)
+; CHECK-LLVM: store ptr addrspace(4) %[[BITCAST_FLOAT_TO_DOUBLE]], ptr %[[DOUBLE_VAR]]
+ %16 = load float addrspace(4)*, float addrspace(4)** %A.addr, align 8, !tbaa !5
+ %17 = bitcast float addrspace(4)* %16 to double addrspace(4)*
+ %18 = call double addrspace(4)* @llvm.ptr.annotation.p4f64.p1i8(double addrspace(4)* %17, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ store double addrspace(4)* %18, double addrspace(4)** %t, align 8, !tbaa !5
+; CHECK-LLVM: %[[FLOAT_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[FLOAT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[FLOAT_FUNC_PARAM_LOAD]], ptr @[[PARAM_3_CACHE_0:]]
+; CHECK-LLVM: %[[INTRINSIC_CALL_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[INTRINSIC_CALL]]
+; CHECK-LLVM: store float 5.000000e+00, ptr addrspace(4) %[[INTRINSIC_CALL_BC]]
+ %19 = load float addrspace(4)*, float addrspace(4)** %A.addr, align 8, !tbaa !5
+ %20 = call float addrspace(4)* @llvm.ptr.annotation.p4f32.p1i8(float addrspace(4)* %19, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ store float 5.000000e+00, float addrspace(4)* %20, align 4, !tbaa !5
+ %21 = bitcast i32* %s to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* %21) #5
+; CHECK-LLVM: %[[INT1_FUNC_PARAM_LOAD:[[:alnum:].]+]] = load ptr addrspace(4), ptr %[[INT_FUNC_PARAM]]
+; CHECK-LLVM: %[[INTRINSIC_CALL:[[:alnum:].]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) %[[INT1_FUNC_PARAM_LOAD]], ptr @[[PARAM_12_CACHE_0:]]
+; CHECK-LLVM: %[[INTRINSIC_CALL_BC:[0-9]+]] = bitcast ptr addrspace(4) %[[INTRINSIC_CALL]]
+; CHECK-LLVM: %[[INTRINSIC_RESULT_LOAD:[[:alnum:].]+]] = load i32, ptr addrspace(4) %[[INTRINSIC_CALL_BC]]
+; CHECK-LLVM: store i32 %[[INTRINSIC_RESULT_LOAD]], ptr %[[INT_VAR_1]]
+ %22 = load i32 addrspace(4)*, i32 addrspace(4)** %B.addr, align 8, !tbaa !5
+ %23 = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %22, i8 addrspace(1)* getelementptr inbounds ([26 x i8], [26 x i8] addrspace(1)* @.str.2, i32 0, i32 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i32 0, i32 0), i32 0, i8 addrspace(1)* null) #6
+ %24 = load i32, i32 addrspace(4)* %23, align 4, !tbaa !5
+ store i32 %24, i32* %s, align 4, !tbaa !5
+ %25 = bitcast i32* %s to i8*
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* %25) #5
+ %26 = bitcast double addrspace(4)** %t to i8*
+ call void @llvm.lifetime.end.p0i8(i64 8, i8* %26) #5
+ %27 = bitcast %struct._ZTS5State.State addrspace(4)** %z to i8*
+ call void @llvm.lifetime.end.p0i8(i64 8, i8* %27) #5
+ %28 = bitcast i32 addrspace(4)** %y to i8*
+ call void @llvm.lifetime.end.p0i8(i64 8, i8* %28) #5
+ %29 = bitcast float addrspace(4)** %x to i8*
+ call void @llvm.lifetime.end.p0i8(i64 8, i8* %29) #5
+ ret void
+}
+
+; Function Attrs: nounwind willreturn
+declare float addrspace(4)* @llvm.ptr.annotation.p4f32.p1i8(float addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #4
+
+; Function Attrs: nounwind willreturn
+declare i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #4
+
+; Function Attrs: nounwind willreturn
+declare %struct._ZTS5State.State addrspace(4)* @llvm.ptr.annotation.p4s_struct._ZTS5State.States(%struct._ZTS5State.State addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #4
+
+; Function Attrs: nounwind willreturn
+declare double addrspace(4)* @llvm.ptr.annotation.p4f64.p1i8(double addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #4
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "sycl-module-id"="/tmp/lsu.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind willreturn }
+attributes #2 = { inlinehint norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind willreturn }
+attributes #5 = { nounwind }
+attributes #6 = { readnone }
+
+!llvm.module.flags = !{!0}
+!opencl.spir.version = !{!1}
+!spirv.Source = !{!2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 2}
+!2 = !{i32 4, i32 100000}
+!3 = !{!"clang version 11.0.0"}
+!4 = !{}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"any pointer", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C++ TBAA"}
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/fpga_lsu_function_call.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/fpga_lsu_function_call.ll
new file mode 100644
index 0000000000000..254ea80f8a8d8
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/fpga_lsu_function_call.ll
@@ -0,0 +1,36 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_fpga_memory_accesses %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV
+
+; CHECK-SPIRV-DAG: OpCapability FPGAMemoryAccessesINTEL
+; CHECK-SPIRV-DAG: OpExtension "SPV_INTEL_fpga_memory_accesses"
+; CHECK-SPIRV: OpDecorate %[[#DecTarget:]] BurstCoalesceINTEL
+; CHECK-SPIRV: %[[#DecTarget]] = OpFunctionCall
+
+; ModuleID = 'test.bc'
+source_filename = "llvm-link"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%struct.MyStruct = type { i32 }
+
+$_ZN8MyStructaSERKS_ = comdat any
+
+$accessor = comdat any
+
+ at .str.1 = private unnamed_addr addrspace(1) constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata"
+ at .str.2 = private unnamed_addr addrspace(1) constant [11 x i8] c"{params:1}\00", section "llvm.metadata"
+
+define spir_func void @foo(ptr %Ptr, ptr byval(%struct.MyStruct) align 4 %Val) {
+entry:
+ %Ptr.ascast = addrspacecast ptr %Ptr to ptr addrspace(4)
+ %Val.ascast = addrspacecast ptr %Val to ptr addrspace(4)
+ %call = call spir_func noundef ptr addrspace(4) @accessor(ptr addrspace(4) %Ptr.ascast)
+ %0 = call ptr addrspace(4) @llvm.ptr.annotation.p4.p1(ptr addrspace(4) %call, ptr addrspace(1) @.str.2, ptr addrspace(1) @.str.1, i32 0, ptr addrspace(1) null)
+ %call1 = call spir_func ptr addrspace(4) @_ZN8MyStructaSERKS_(ptr addrspace(4) %0, ptr addrspace(4) %Val.ascast)
+ ret void
+}
+
+declare ptr addrspace(4) @llvm.ptr.annotation.p4.p1(ptr addrspace(4), ptr addrspace(1), ptr addrspace(1), i32, ptr addrspace(1))
+
+declare spir_func ptr addrspace(4) @_ZN8MyStructaSERKS_(ptr addrspace(4) %this, ptr addrspace(4) %op)
+
+declare spir_func ptr addrspace(4) @accessor(ptr addrspace(4) %this)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/intel_fpga_lsu_optimized.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/intel_fpga_lsu_optimized.ll
new file mode 100644
index 0000000000000..60f8173eb4bdd
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_fpga_memory_accesses/intel_fpga_lsu_optimized.ll
@@ -0,0 +1,152 @@
+; LLVM IR generated by Intel SYCL Clang compiler (https://github.com/intel/llvm)
+; SYCL source code can be found below:
+
+; #include <CL/sycl.hpp>
+; #include <CL/sycl/intel/fpga_extensions.hpp>
+;
+; int main() {
+; cl::sycl::queue Queue{cl::sycl::intel::fpga_emulator_selector{}};
+;
+; {
+; cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
+; cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
+;
+; Queue.submit([&](cl::sycl::handler &cgh) {
+; auto output_accessor =
+; output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
+; auto input_accessor =
+; input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
+;
+; cgh.single_task<class kernel>([=] {
+; auto input_ptr = input_accessor.get_pointer();
+; auto output_ptr = output_accessor.get_pointer();
+;
+; using PrefetchingLSU =
+; cl::sycl::intel::lsu<cl::sycl::intel::prefetch<true>,
+; cl::sycl::intel::statically_coalesce<false>>;
+;
+; using BurstCoalescedLSU =
+; cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
+; cl::sycl::intel::statically_coalesce<false>>;
+;
+; using CachingLSU =
+; cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
+; cl::sycl::intel::cache<1024>,
+; cl::sycl::intel::statically_coalesce<false>>;
+;
+; using PipelinedLSU = cl::sycl::intel::lsu<>;
+;
+; int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
+; int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
+;
+; BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
+; PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y
+; });
+; });
+; }
+;
+; return 0;
+; }
+
+; Check that translation of optimized IR doesn't crash:
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_fpga_memory_accesses -o %t.spv
+
+; Check that reverse translation restore ptr.annotations correctly:
+; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
+%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
+
+$"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel" = comdat any
+
+ at .str = private unnamed_addr addrspace(1) constant [26 x i8] c"{params:12}{cache-size:0}\00", section "llvm.metadata"
+ at .str.1 = private unnamed_addr addrspace(1) constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata"
+ at .str.2 = private unnamed_addr addrspace(1) constant [28 x i8] c"{params:7}{cache-size:1024}\00", section "llvm.metadata"
+ at .str.3 = private unnamed_addr addrspace(1) constant [25 x i8] c"{params:5}{cache-size:0}\00", section "llvm.metadata"
+ at .str.4 = private unnamed_addr addrspace(1) constant [25 x i8] c"{params:0}{cache-size:0}\00", section "llvm.metadata"
+
+; CHECK-LLVM: [[PTR_i27_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:12}
+; CHECK-LLVM: [[PTR_i15_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:7}{cache-size:1024}
+; CHECK-LLVM: [[PTR_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:5}
+; CHECK-LLVM: [[PTR_i_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:0}{cache-size:0}
+
+; Function Attrs: norecurse
+define weak_odr dso_local spir_kernel void @"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel"(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_buffer_location !8 {
+entry:
+ %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0
+ %1 = load i64, i64* %0, align 8
+ %add.ptr.i27 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %1
+ %2 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0
+ %3 = load i64, i64* %2, align 8
+ %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %3
+ %4 = addrspacecast i32 addrspace(1)* %add.ptr.i27 to i32 addrspace(4)*
+ %5 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %4, i8 addrspace(1)* getelementptr inbounds ([26 x i8], [26 x i8] addrspace(1)* @.str, i64 0, i64 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i64 0, i64 0), i32 0, i8 addrspace(1)* null) #2
+ %6 = load i32, i32 addrspace(4)* %5, align 4, !tbaa !9
+ ; CHECK-LLVM: [[PTR_i27:[%0-9a-z.]+]] = getelementptr inbounds i32, ptr addrspace(1) {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
+ ; CHECK-LLVM: [[PTR_i:[%0-9a-z.]+]] = getelementptr inbounds i32, ptr addrspace(1) {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
+ ; CHECK-LLVM: [[PTR_i27_AS_CAST:[%0-9a-z.]+]] = addrspacecast ptr addrspace(1) [[PTR_i27]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) [[PTR_i27_AS_CAST]], ptr [[PTR_i27_ANNOT_STR]]
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC2:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL_BC]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_RESULT_LOAD:[%0-9a-z.]+]] = load i32, ptr addrspace(4) [[PTR_ANNOT_CALL_BC2]]
+ %add.ptr.i15.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i27, i64 1
+ %7 = addrspacecast i32 addrspace(1)* %add.ptr.i15.i to i32 addrspace(4)*
+ %8 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %7, i8 addrspace(1)* getelementptr inbounds ([28 x i8], [28 x i8] addrspace(1)* @.str.2, i64 0, i64 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i64 0, i64 0), i32 0, i8 addrspace(1)* null) #2
+ %9 = load i32, i32 addrspace(4)* %8, align 4, !tbaa !9
+ ; CHECK-LLVM: [[PTR_i15_i:[%0-9a-z.]+]] = getelementptr inbounds i32, ptr addrspace(1) {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
+ ; CHECK-LLVM: [[PTR_i15_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast ptr addrspace(1) [[PTR_i15_i]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) [[PTR_i15_i_AS_CAST]], ptr [[PTR_i15_i_ANNOT_STR]]
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC2:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL_BC]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_RESULT_LOAD_1:[%0-9a-z.]+]] = load i32, ptr addrspace(4) [[PTR_ANNOT_CALL_BC2]]
+ %10 = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)*
+ %11 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %10, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str.3, i64 0, i64 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i64 0, i64 0), i32 0, i8 addrspace(1)* null) #2
+ store i32 %6, i32 addrspace(4)* %11, align 4, !tbaa !9
+ ; CHECK-LLVM: [[PTR_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast ptr addrspace(1) [[PTR_i]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) [[PTR_i_AS_CAST]], ptr [[PTR_i_ANNOT_STR]]
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC2:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL_BC]] to ptr addrspace(4)
+ ; CHECK-LLVM: store i32 [[PTR_RESULT_LOAD]], ptr addrspace(4) [[PTR_ANNOT_CALL_BC2]]
+ %add.ptr.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 1
+ %12 = addrspacecast i32 addrspace(1)* %add.ptr.i.i to i32 addrspace(4)*
+ %13 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)* %12, i8 addrspace(1)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(1)* @.str.4, i64 0, i64 0), i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @.str.1, i64 0, i64 0), i32 0, i8 addrspace(1)* null) #2
+ store i32 %9, i32 addrspace(4)* %13, align 4, !tbaa !9
+ ; CHECK-LLVM: [[PTR_i_i:[%0-9a-z.]+]] = getelementptr inbounds i32, ptr addrspace(1) {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
+ ; CHECK-LLVM: [[PTR_i_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast ptr addrspace(1) [[PTR_i_i]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call ptr addrspace(4) @llvm.ptr.annotation.p4.p0(ptr addrspace(4) [[PTR_i_i_AS_CAST]], ptr [[PTR_i_i_ANNOT_STR]]
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL]] to ptr addrspace(4)
+ ; CHECK-LLVM: [[PTR_ANNOT_CALL_BC2:[%0-9a-z.]+]] = bitcast ptr addrspace(4) [[PTR_ANNOT_CALL_BC]] to ptr addrspace(4)
+ ; CHECK-LLVM: store i32 [[PTR_RESULT_LOAD_1]], ptr addrspace(4) [[PTR_ANNOT_CALL_BC2]]
+ ret void
+}
+
+; Function Attrs: nounwind willreturn
+declare i32 addrspace(4)* @llvm.ptr.annotation.p4i32.p1i8(i32 addrspace(4)*, i8 addrspace(1)*, i8 addrspace(1)*, i32, i8 addrspace(1)*) #1
+
+attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="fpga_lsu.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind willreturn }
+attributes #2 = { nounwind readnone }
+
+!opencl.spir.version = !{!0}
+!spirv.Source = !{!1}
+!llvm.ident = !{!2}
+!llvm.module.flags = !{!3}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
+!2 = !{!"clang version 12.0.0"}
+!3 = !{i32 1, !"wchar_size", i32 4}
+!4 = !{i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0}
+!5 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"}
+!6 = !{!"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>", !"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>"}
+!7 = !{!"", !"", !"", !"", !"", !"", !"", !""}
+!8 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"int", !11, i64 0}
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C++ TBAA"}
More information about the llvm-commits
mailing list