[llvm] cb13e92 - [BPF] Attribute preserve_static_offset for structs

Eduard Zingerman via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 30 09:46:29 PST 2023


Author: Eduard Zingerman
Date: 2023-11-30T19:45:03+02:00
New Revision: cb13e9286b6d4e384b5d4203e853d44e2eff0f0f

URL: https://github.com/llvm/llvm-project/commit/cb13e9286b6d4e384b5d4203e853d44e2eff0f0f
DIFF: https://github.com/llvm/llvm-project/commit/cb13e9286b6d4e384b5d4203e853d44e2eff0f0f.diff

LOG: [BPF] Attribute preserve_static_offset for structs

This commit adds a new BPF specific structure attribte
`__attribute__((preserve_static_offset))` and a pass to deal with it.

This attribute may be attached to a struct or union declaration, where
it notifies the compiler that this structure is a "context" structure.
The following limitations apply to context structures:
- runtime environment might patch access to the fields of this type by
  updating the field offset;

  BPF verifier limits access patterns allowed for certain data
  types. E.g. `struct __sk_buff` and `struct bpf_sock_ops`. For these
  types only `LD/ST <reg> <static-offset>` memory loads and stores are
  allowed.

  This is so because offsets of the fields of these structures do not
  match real offsets in the running kernel. During BPF program
  load/verification loads and stores to the fields of these types are
  rewritten so that offsets match real offsets. For this rewrite to
  happen static offsets have to be encoded in the instructions.

  See `kernel/bpf/verifier.c:convert_ctx_access` function in the Linux
  kernel source tree for details.

- runtime environment might disallow access to the field of the type
  through modified pointers.

  During BPF program verification a tag `PTR_TO_CTX` is tracked for
  register values. In case if register with such tag is modified BPF
  programs are not allowed to read or write memory using register. See
  kernel/bpf/verifier.c:check_mem_access function in the Linux kernel
  source tree for details.

Access to the structure fields is translated to IR as a sequence:
- `(load (getelementptr %ptr %offset))` or
- `(store (getelementptr %ptr %offset))`

During instruction selection phase such sequences are translated as a
single load instruction with embedded offset, e.g. `LDW %ptr, %offset`,
which matches access pattern necessary for the restricted
set of types described above (when `%offset` is static).

Multiple optimizer passes might separate these instructions, this
includes:
- SimplifyCFGPass (sinking)
- InstCombine (sinking)
- GVN (hoisting)

The `preserve_static_offset` attribute marks structures for which the
following transformations happen:
- at the early IR processing stage:
  - `(load (getelementptr ...))` replaced by call to intrinsic
    `llvm.bpf.getelementptr.and.load`;
  - `(store (getelementptr ...))` replaced by call to intrinsic
    `llvm.bpf.getelementptr.and.store`;
- at the late IR processing stage this modification is undone.

Such handling prevents various optimizer passes from generating
sequences of instructions that would be rejected by BPF verifier.

The __attribute__((preserve_static_offset)) has a priority over
__attribute__((preserve_access_index)). When preserve_access_index
attribute is present preserve access index transformations are not
applied.

This addresses the issue reported by the following thread:

https://lore.kernel.org/bpf/CAA-VZPmxh8o8EBcJ=m-DH4ytcxDFmo0JKsm1p1gf40kS0CE3NQ@mail.gmail.com/T/#m4b9ce2ce73b34f34172328f975235fc6f19841b6

Differential Revision: https://reviews.llvm.org/D133361

Added: 
    clang/test/CodeGen/bpf-preserve-static-offset-arr.c
    clang/test/CodeGen/bpf-preserve-static-offset-bitfield.c
    clang/test/CodeGen/bpf-preserve-static-offset-lvalue.c
    clang/test/CodeGen/bpf-preserve-static-offset-non-bpf.c
    clang/test/CodeGen/bpf-preserve-static-offset-pai.c
    clang/test/Sema/bpf-attr-preserve-static-offset-warns-nonbpf.c
    clang/test/Sema/bpf-attr-preserve-static-offset-warns.c
    clang/test/Sema/bpf-attr-preserve-static-offset.c
    llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp
    llvm/test/CodeGen/BPF/preserve-static-offset/load-align.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-arr-pai.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-atomic.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-2.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-oob.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8-oob.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8-type-mismatch.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-chain.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-inline.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-non-const.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-ptr-pai.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-simple.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-struct-pai.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-align.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain-oob.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain-u8.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-simple.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-volatile.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-union-pai.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-unroll-inline.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-unroll.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-volatile.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/load-zero.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-align.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-atomic.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-2.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-oob.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-u8-oob.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-u8.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-chain.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-pai.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-simple.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-align.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain-oob.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain-u8.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-simple.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-volatile.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-unroll-inline.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-volatile.ll
    llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll

Modified: 
    clang/include/clang/Basic/Attr.td
    clang/include/clang/Basic/AttrDocs.td
    clang/lib/CodeGen/CGExpr.cpp
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/test/Misc/pragma-attribute-supported-attributes-list.test
    llvm/include/llvm/IR/Intrinsics.td
    llvm/include/llvm/IR/IntrinsicsBPF.td
    llvm/lib/Target/BPF/BPF.h
    llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
    llvm/lib/Target/BPF/BPFCORE.h
    llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
    llvm/lib/Target/BPF/BPFTargetMachine.cpp
    llvm/lib/Target/BPF/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 1800f584c7e1088..121ed203829cec4 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2024,6 +2024,14 @@ def BPFPreserveAccessIndex : InheritableAttr,
   let LangOpts = [COnly];
 }
 
+def BPFPreserveStaticOffset : InheritableAttr,
+                              TargetSpecificAttr<TargetBPF>  {
+  let Spellings = [Clang<"preserve_static_offset">];
+  let Subjects = SubjectList<[Record], ErrorDiag>;
+  let Documentation = [BPFPreserveStaticOffsetDocs];
+  let LangOpts = [COnly];
+}
+
 def BTFDeclTag : InheritableAttr {
   let Spellings = [Clang<"btf_decl_tag">];
   let Args = [StringArgument<"BTFDeclTag">];

diff  --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index f2c4eb51b443ddb..dafc811c5225159 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2199,6 +2199,43 @@ preserving struct or union member access debuginfo indices of this
 struct or union, similar to clang ``__builtin_preserve_access_index()``.
   }];
 }
+
+def BPFPreserveStaticOffsetDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+Clang supports the ``__attribute__((preserve_static_offset))``
+attribute for the BPF target. This attribute may be attached to a
+struct or union declaration. Reading or writing fields of types having
+such annotation is guaranteed to generate LDX/ST/STX instruction with
+offset corresponding to the field.
+
+For example:
+
+.. code-block:: c
+
+  struct foo {
+    int a;
+    int b;
+  };
+
+  struct bar {
+    int a;
+    struct foo b;
+  } __attribute__((preserve_static_offset));
+
+  void buz(struct bar *g) {
+    g->b.a = 42;
+  }
+
+The assignment to ``g``'s field would produce an ST instruction with
+offset 8: ``*(u32)(r1 + 8) = 42;``.
+
+Without this attribute generated instructions might be 
diff erent,
+depending on optimizations behavior. E.g. the example above could be
+rewritten as ``r1 += 8; *(u32)(r1 + 0) = 42;``.
+  }];
+}
+
 def BTFDeclTagDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{

diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9d1f1a58f9e1c5e..69cf7f76be9a709 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3833,6 +3833,33 @@ static QualType getFixedSizeElementType(const ASTContext &ctx,
   return eltType;
 }
 
+static bool hasBPFPreserveStaticOffset(const RecordDecl *D) {
+  return D && D->hasAttr<BPFPreserveStaticOffsetAttr>();
+}
+
+static bool hasBPFPreserveStaticOffset(const Expr *E) {
+  if (!E)
+    return false;
+  QualType PointeeType = E->getType()->getPointeeType();
+  if (PointeeType.isNull())
+    return false;
+  if (const auto *BaseDecl = PointeeType->getAsRecordDecl())
+    return hasBPFPreserveStaticOffset(BaseDecl);
+  return false;
+}
+
+// Wraps Addr with a call to llvm.preserve.static.offset intrinsic.
+static Address wrapWithBPFPreserveStaticOffset(CodeGenFunction &CGF,
+                                               Address &Addr) {
+  if (!CGF.getTarget().getTriple().isBPF())
+    return Addr;
+
+  llvm::Function *Fn =
+      CGF.CGM.getIntrinsic(llvm::Intrinsic::preserve_static_offset);
+  llvm::CallInst *Call = CGF.Builder.CreateCall(Fn, {Addr.getPointer()});
+  return Address(Call, Addr.getElementType(), Addr.getAlignment());
+}
+
 /// Given an array base, check whether its member access belongs to a record
 /// with preserve_access_index attribute or not.
 static bool IsPreserveAIArrayBase(CodeGenFunction &CGF, const Expr *ArrayBase) {
@@ -3894,6 +3921,9 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
   CharUnits eltAlign =
     getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize);
 
+  if (hasBPFPreserveStaticOffset(Base))
+    addr = wrapWithBPFPreserveStaticOffset(CGF, addr);
+
   llvm::Value *eltPtr;
   auto LastIndex = dyn_cast<llvm::ConstantInt>(indices.back());
   if (!LastIndex ||
@@ -4522,6 +4552,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
     Address Addr = base.getAddress(*this);
     unsigned Idx = RL.getLLVMFieldNo(field);
     const RecordDecl *rec = field->getParent();
+    if (hasBPFPreserveStaticOffset(rec))
+      Addr = wrapWithBPFPreserveStaticOffset(*this, Addr);
     if (!UseVolatile) {
       if (!IsInPreservedAIRegion &&
           (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
@@ -4594,6 +4626,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
   }
 
   Address addr = base.getAddress(*this);
+  if (hasBPFPreserveStaticOffset(rec))
+    addr = wrapWithBPFPreserveStaticOffset(*this, addr);
   if (auto *ClassDef = dyn_cast<CXXRecordDecl>(rec)) {
     if (CGM.getCodeGenOpts().StrictVTablePointers &&
         ClassDef->isDynamicClass()) {

diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 87c78d742d0ff4d..a345978bb8701ce 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -9036,6 +9036,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_BPFPreserveAccessIndex:
     handleBPFPreserveAccessIndexAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_BPFPreserveStaticOffset:
+    handleSimpleAttribute<BPFPreserveStaticOffsetAttr>(S, D, AL);
+    break;
   case ParsedAttr::AT_BTFDeclTag:
     handleBTFDeclTagAttr(S, D, AL);
     break;

diff  --git a/clang/test/CodeGen/bpf-preserve-static-offset-arr.c b/clang/test/CodeGen/bpf-preserve-static-offset-arr.c
new file mode 100644
index 000000000000000..295bd2919fc6512
--- /dev/null
+++ b/clang/test/CodeGen/bpf-preserve-static-offset-arr.c
@@ -0,0 +1,33 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: bpf-registered-target
+// RUN: %clang -cc1 -triple bpf -disable-llvm-passes -S -emit-llvm -o - %s \
+// RUN: | FileCheck %s
+
+// Check that call to preserve.static.offset is generated when array
+// member of a struct marked with __attribute__((preserve_static_offset))
+// is accessed.
+
+#define __ctx __attribute__((preserve_static_offset))
+
+struct foo {
+  struct {
+    int a;
+  } b[7];
+} __ctx;
+
+// CHECK-LABEL: define dso_local i32 @arr_access
+// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.preserve.static.offset(ptr [[TMP0]])
+// CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [7 x %struct.anon], ptr [[B]], i64 0, i64 2
+// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[ARRAYIDX]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int arr_access(struct foo *p) {
+  return p->b[2].a;
+}

diff  --git a/clang/test/CodeGen/bpf-preserve-static-offset-bitfield.c b/clang/test/CodeGen/bpf-preserve-static-offset-bitfield.c
new file mode 100644
index 000000000000000..e4fd2eeeeb66342
--- /dev/null
+++ b/clang/test/CodeGen/bpf-preserve-static-offset-bitfield.c
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: bpf-registered-target
+// RUN: %clang -cc1 -triple bpf -disable-llvm-passes -S -emit-llvm -o - %s \
+// RUN: | FileCheck %s
+
+// Check that call to preserve.static.offset is generated when bitfield
+// from a struct marked with __attribute__((preserve_static_offset)) is
+// accessed.
+
+#define __ctx __attribute__((preserve_static_offset))
+
+struct foo {
+  unsigned a:1;
+} __ctx;
+
+// CHECK-LABEL: define dso_local void @lvalue_bitfield
+// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.preserve.static.offset(ptr [[TMP0]])
+// CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -2
+// CHECK-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// CHECK-NEXT:    store i8 [[BF_SET]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    ret void
+//
+void lvalue_bitfield(struct foo *p) {
+  p->a = 1;
+}

diff  --git a/clang/test/CodeGen/bpf-preserve-static-offset-lvalue.c b/clang/test/CodeGen/bpf-preserve-static-offset-lvalue.c
new file mode 100644
index 000000000000000..4f0c359366f5a54
--- /dev/null
+++ b/clang/test/CodeGen/bpf-preserve-static-offset-lvalue.c
@@ -0,0 +1,28 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: bpf-registered-target
+// RUN: %clang -cc1 -triple bpf -disable-llvm-passes -S -emit-llvm -o - %s \
+// RUN: | FileCheck %s
+
+// Check that call to preserve.static.offset is generated when field of
+// a struct marked with __attribute__((preserve_static_offset)) is accessed.
+
+#define __ctx __attribute__((preserve_static_offset))
+
+struct foo {
+  int a;
+} __ctx;
+
+// CHECK-LABEL: define dso_local void @lvalue
+// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.preserve.static.offset(ptr [[TMP0]])
+// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT:    store i32 42, ptr [[A]], align 4
+// CHECK-NEXT:    ret void
+//
+void lvalue(struct foo *p) {
+  p->a = 42;
+}

diff  --git a/clang/test/CodeGen/bpf-preserve-static-offset-non-bpf.c b/clang/test/CodeGen/bpf-preserve-static-offset-non-bpf.c
new file mode 100644
index 000000000000000..3fe8d2517fe30af
--- /dev/null
+++ b/clang/test/CodeGen/bpf-preserve-static-offset-non-bpf.c
@@ -0,0 +1,18 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang -cc1 -triple x86_64 -disable-llvm-passes -S -emit-llvm  -o - %s \
+// RUN: | FileCheck %s
+
+// Verify that __attribute__((preserve_static_offset))
+// has no effect for non-BPF target.
+
+#define __ctx __attribute__((preserve_static_offset))
+
+struct foo {
+  int a;
+} __ctx;
+
+// CHECK-NOT: @llvm_preserve_static_offset
+
+int bar(struct foo *p) {
+  return p->a;
+}

diff  --git a/clang/test/CodeGen/bpf-preserve-static-offset-pai.c b/clang/test/CodeGen/bpf-preserve-static-offset-pai.c
new file mode 100644
index 000000000000000..df1f33b1a66415f
--- /dev/null
+++ b/clang/test/CodeGen/bpf-preserve-static-offset-pai.c
@@ -0,0 +1,29 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: bpf-registered-target
+// RUN: %clang -cc1 -triple bpf -disable-llvm-passes -S -emit-llvm -o - %s \
+// RUN: | FileCheck %s
+
+// Verify that preserve_static_offset does not interfere with
+// preserve_access_index at IR generation stage.
+
+#define __ctx __attribute__((preserve_static_offset))
+#define __pai __attribute__((preserve_access_index))
+
+struct foo {
+  int a;
+} __ctx __pai;
+
+// CHECK-LABEL: define dso_local i32 @bar
+// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.preserve.static.offset(ptr [[TMP0]])
+// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int bar(struct foo *p) {
+  return p->a;
+}

diff  --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index dd91f4f88ad685b..707fc8875089f7e 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -23,6 +23,7 @@
 // CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable))
 // CHECK-NEXT: AvailableOnlyInDefaultEvalMethod (SubjectMatchRule_type_alias)
 // CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record)
+// CHECK-NEXT: BPFPreserveStaticOffset (SubjectMatchRule_record)
 // CHECK-NEXT: BTFDeclTag (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record, SubjectMatchRule_field, SubjectMatchRule_type_alias)
 // CHECK-NEXT: BuiltinAlias (SubjectMatchRule_function)
 // CHECK-NEXT: CFAuditedTransfer (SubjectMatchRule_function)

diff  --git a/clang/test/Sema/bpf-attr-preserve-static-offset-warns-nonbpf.c b/clang/test/Sema/bpf-attr-preserve-static-offset-warns-nonbpf.c
new file mode 100644
index 000000000000000..d543e6f999529d0
--- /dev/null
+++ b/clang/test/Sema/bpf-attr-preserve-static-offset-warns-nonbpf.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define __pso __attribute__((preserve_static_offset))
+
+struct foo { int a; } __pso; // expected-warning{{unknown attribute}}
+union quux { int a; } __pso; // expected-warning{{unknown attribute}}

diff  --git a/clang/test/Sema/bpf-attr-preserve-static-offset-warns.c b/clang/test/Sema/bpf-attr-preserve-static-offset-warns.c
new file mode 100644
index 000000000000000..1067ebe8f82b051
--- /dev/null
+++ b/clang/test/Sema/bpf-attr-preserve-static-offset-warns.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -triple bpf-pc-linux-gnu %s
+
+#define __pso __attribute__((preserve_static_offset))
+
+// These are correct usages.
+struct foo { int a; } __pso;
+union quux { int a; } __pso;
+struct doug { int a; } __pso __attribute__((packed));
+
+// Rest are incorrect usages.
+typedef int bar __pso;    // expected-error{{attribute only applies to}}
+struct goo {
+  int a __pso;            // expected-error{{attribute only applies to}}
+};
+int g __pso;              // expected-error{{attribute only applies to}}
+__pso void ffunc1(void);  // expected-error{{attribute only applies to}}
+void ffunc2(int a __pso); // expected-error{{attribute only applies to}}
+void ffunc3(void) {
+  int a __pso;            // expected-error{{attribute only applies to}}
+}
+
+struct buz { int a; } __attribute__((preserve_static_offset("hello"))); // \
+  expected-error{{attribute takes no arguments}}

diff  --git a/clang/test/Sema/bpf-attr-preserve-static-offset.c b/clang/test/Sema/bpf-attr-preserve-static-offset.c
new file mode 100644
index 000000000000000..5f53469869f3d7d
--- /dev/null
+++ b/clang/test/Sema/bpf-attr-preserve-static-offset.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -fsyntax-only -ast-dump -triple bpf-pc-linux-gnu %s | FileCheck %s
+
+// The 'preserve_static_offset' attribute should be propagated to
+// inline declarations (foo's 'b', 'bb', 'c' but not 'd').
+//
+// CHECK:      RecordDecl {{.*}} struct foo definition
+// CHECK-NEXT:   BPFPreserveStaticOffsetAttr
+// CHECK-NEXT:   FieldDecl {{.*}} a
+// CHECK-NEXT:   RecordDecl {{.*}} struct definition
+// CHECK-NEXT:     FieldDecl {{.*}} aa
+// CHECK-NEXT:   FieldDecl {{.*}} b
+// CHECK-NEXT: RecordDecl {{.*}} union bar definition
+// CHECK-NEXT:   BPFPreserveStaticOffsetAttr
+// CHECK-NEXT:   FieldDecl {{.*}} a
+// CHECK-NEXT:   FieldDecl {{.*}} b
+
+struct foo {
+  int a;
+  struct {
+    int aa;
+  } b;
+} __attribute__((preserve_static_offset));
+
+union bar {
+  int a;
+  long b;
+} __attribute__((preserve_static_offset));

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 060e964f77bf712..b54c697296b20ae 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2469,6 +2469,10 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
                                                  [IntrNoMem,
                                                   ImmArg<ArgIndex<1>>,
                                                   ImmArg<ArgIndex<2>>]>;
+def int_preserve_static_offset : DefaultAttrsIntrinsic<[llvm_ptr_ty],
+                                                       [llvm_ptr_ty],
+                                                       [IntrNoMem, IntrSpeculatable,
+                                                        ReadNone <ArgIndex<0>>]>;
 
 //===------------ Intrinsics to perform common vector shuffles ------------===//
 

diff  --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index 8916b60d2be39e1..c7ec0916f1d1f8f 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -37,4 +37,43 @@ let TargetPrefix = "bpf" in {  // All intrinsics start with "llvm.bpf."
   def int_bpf_compare : ClangBuiltin<"__builtin_bpf_compare">,
               Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_anyint_ty, llvm_anyint_ty],
               [IntrNoMem]>;
+  def int_bpf_getelementptr_and_load : ClangBuiltin<"__builtin_bpf_getelementptr_and_load">,
+              Intrinsic<[llvm_any_ty],
+                        [llvm_ptr_ty,     // base ptr for getelementptr
+                         llvm_i1_ty,      // volatile
+                         llvm_i8_ty,      // atomic order
+                         llvm_i8_ty,      // synscope id
+                         llvm_i8_ty,      // alignment
+                         llvm_i1_ty,      // inbounds
+                         llvm_vararg_ty], // indices for getelementptr insn
+                        [IntrNoCallback,
+                         IntrNoFree,
+                         IntrWillReturn,
+                         NoCapture <ArgIndex<0>>,
+                         ImmArg    <ArgIndex<1>>, // volatile
+                         ImmArg    <ArgIndex<2>>, // atomic order
+                         ImmArg    <ArgIndex<3>>, // synscope id
+                         ImmArg    <ArgIndex<4>>, // alignment
+                         ImmArg    <ArgIndex<5>>, // inbounds
+                        ]>;
+  def int_bpf_getelementptr_and_store : ClangBuiltin<"__builtin_bpf_getelementptr_and_store">,
+              Intrinsic<[],
+                        [llvm_any_ty,     // value to store
+                         llvm_ptr_ty,     // base ptr for getelementptr
+                         llvm_i1_ty,      // volatile
+                         llvm_i8_ty,      // atomic order
+                         llvm_i8_ty,      // syncscope id
+                         llvm_i8_ty,      // alignment
+                         llvm_i1_ty,      // inbounds
+                         llvm_vararg_ty], // indexes for getelementptr insn
+                        [IntrNoCallback,
+                         IntrNoFree,
+                         IntrWillReturn,
+                         NoCapture <ArgIndex<1>>,
+                         ImmArg    <ArgIndex<2>>, // volatile
+                         ImmArg    <ArgIndex<3>>, // atomic order
+                         ImmArg    <ArgIndex<4>>, // syncscope id
+                         ImmArg    <ArgIndex<5>>, // alignment
+                         ImmArg    <ArgIndex<6>>, // inbounds
+                        ]>;
 }

diff  --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index 1f539d3270b712e..436cd62c2581381 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -10,6 +10,7 @@
 #define LLVM_LIB_TARGET_BPF_BPF_H
 
 #include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetMachine.h"
@@ -62,6 +63,24 @@ class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
 public:
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
+
+class BPFPreserveStaticOffsetPass
+    : public PassInfoMixin<BPFPreserveStaticOffsetPass> {
+  bool AllowPartial;
+
+public:
+  BPFPreserveStaticOffsetPass(bool AllowPartial) : AllowPartial(AllowPartial) {}
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  static bool isRequired() { return true; }
+
+  static std::pair<GetElementPtrInst *, LoadInst *>
+  reconstructLoad(CallInst *Call);
+
+  static std::pair<GetElementPtrInst *, StoreInst *>
+  reconstructStore(CallInst *Call);
+};
+
 } // namespace llvm
 
 #endif

diff  --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 1895d15c1f55330..9634c16a30dc6a7 100644
--- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -172,8 +172,6 @@ class BPFAbstractMemberAccess final {
   bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI,
                       const MDNode *ChildMeta);
   bool removePreserveAccessIndexIntrinsic(Function &F);
-  void replaceWithGEP(std::vector<CallInst *> &CallList,
-                      uint32_t NumOfZerosIndex, uint32_t DIIndex);
   bool HasPreserveFieldInfoCall(CallInfoStack &CallStack);
   void GetStorageBitRange(DIDerivedType *MemberTy, Align RecordAlignment,
                           uint32_t &StartBitOffset, uint32_t &EndBitOffset);
@@ -185,7 +183,6 @@ class BPFAbstractMemberAccess final {
                                  std::string &AccessKey, MDNode *&BaseMeta);
   MDNode *computeAccessKey(CallInst *Call, CallInfo &CInfo,
                            std::string &AccessKey, bool &IsInt32Ret);
-  uint64_t getConstant(const Value *IndexValue);
   bool transformGEPChain(CallInst *Call, CallInfo &CInfo);
 };
 
@@ -326,6 +323,12 @@ static Type *getBaseElementType(const CallInst *Call) {
   return Call->getParamElementType(0);
 }
 
+static uint64_t getConstant(const Value *IndexValue) {
+  const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue);
+  assert(CV);
+  return CV->getValue().getZExtValue();
+}
+
 /// Check whether a call is a preserve_*_access_index intrinsic call or not.
 bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
                                                           CallInfo &CInfo) {
@@ -410,26 +413,36 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
   return false;
 }
 
-void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList,
-                                             uint32_t DimensionIndex,
-                                             uint32_t GEPIndex) {
-  for (auto *Call : CallList) {
-    uint32_t Dimension = 1;
-    if (DimensionIndex > 0)
-      Dimension = getConstant(Call->getArgOperand(DimensionIndex));
-
-    Constant *Zero =
-        ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0);
-    SmallVector<Value *, 4> IdxList;
-    for (unsigned I = 0; I < Dimension; ++I)
-      IdxList.push_back(Zero);
-    IdxList.push_back(Call->getArgOperand(GEPIndex));
-
-    auto *GEP = GetElementPtrInst::CreateInBounds(
-        getBaseElementType(Call), Call->getArgOperand(0), IdxList, "", Call);
-    Call->replaceAllUsesWith(GEP);
-    Call->eraseFromParent();
-  }
+static void replaceWithGEP(CallInst *Call, uint32_t DimensionIndex,
+                           uint32_t GEPIndex) {
+  uint32_t Dimension = 1;
+  if (DimensionIndex > 0)
+    Dimension = getConstant(Call->getArgOperand(DimensionIndex));
+
+  Constant *Zero =
+      ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0);
+  SmallVector<Value *, 4> IdxList;
+  for (unsigned I = 0; I < Dimension; ++I)
+    IdxList.push_back(Zero);
+  IdxList.push_back(Call->getArgOperand(GEPIndex));
+
+  auto *GEP = GetElementPtrInst::CreateInBounds(
+      getBaseElementType(Call), Call->getArgOperand(0), IdxList, "", Call);
+  Call->replaceAllUsesWith(GEP);
+  Call->eraseFromParent();
+}
+
+void BPFCoreSharedInfo::removeArrayAccessCall(CallInst *Call) {
+  replaceWithGEP(Call, 1, 2);
+}
+
+void BPFCoreSharedInfo::removeStructAccessCall(CallInst *Call) {
+  replaceWithGEP(Call, 0, 1);
+}
+
+void BPFCoreSharedInfo::removeUnionAccessCall(CallInst *Call) {
+  Call->replaceAllUsesWith(Call->getArgOperand(0));
+  Call->eraseFromParent();
 }
 
 bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Function &F) {
@@ -464,12 +477,12 @@ bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Function &F) {
   // . addr = preserve_struct_access_index(base, gep_index, di_index)
   //   is transformed to
   //     addr = GEP(base, 0, gep_index)
-  replaceWithGEP(PreserveArrayIndexCalls, 1, 2);
-  replaceWithGEP(PreserveStructIndexCalls, 0, 1);
-  for (auto *Call : PreserveUnionIndexCalls) {
-    Call->replaceAllUsesWith(Call->getArgOperand(0));
-    Call->eraseFromParent();
-  }
+  for (CallInst *Call : PreserveArrayIndexCalls)
+    BPFCoreSharedInfo::removeArrayAccessCall(Call);
+  for (CallInst *Call : PreserveStructIndexCalls)
+    BPFCoreSharedInfo::removeStructAccessCall(Call);
+  for (CallInst *Call : PreserveUnionIndexCalls)
+    BPFCoreSharedInfo::removeUnionAccessCall(Call);
 
   return Found;
 }
@@ -634,12 +647,6 @@ void BPFAbstractMemberAccess::collectAICallChains(Function &F) {
     }
 }
 
-uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) {
-  const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue);
-  assert(CV);
-  return CV->getValue().getZExtValue();
-}
-
 /// Get the start and the end of storage offset for \p MemberTy.
 void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType *MemberTy,
                                                  Align RecordAlignment,

diff  --git a/llvm/lib/Target/BPF/BPFCORE.h b/llvm/lib/Target/BPF/BPFCORE.h
index 9a547a775c96024..f46a8ef62a7faa1 100644
--- a/llvm/lib/Target/BPF/BPFCORE.h
+++ b/llvm/lib/Target/BPF/BPFCORE.h
@@ -10,6 +10,7 @@
 #define LLVM_LIB_TARGET_BPF_BPFCORE_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Instructions.h"
 
 namespace llvm {
 
@@ -53,6 +54,9 @@ class BPFCoreSharedInfo {
   static Instruction *insertPassThrough(Module *M, BasicBlock *BB,
                                         Instruction *Input,
                                         Instruction *Before);
+  static void removeArrayAccessCall(CallInst *Call);
+  static void removeStructAccessCall(CallInst *Call);
+  static void removeUnionAccessCall(CallInst *Call);
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index a3616ae7ebabeff..56c89f61b3195ac 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -12,6 +12,8 @@
 // The following are done for IR adjustment:
 //   - remove __builtin_bpf_passthrough builtins. Target independent IR
 //     optimizations are done and those builtins can be removed.
+//   - remove llvm.bpf.getelementptr.and.load builtins.
+//   - remove llvm.bpf.getelementptr.and.store builtins.
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,6 +26,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsBPF.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
@@ -51,6 +54,7 @@ class BPFCheckAndAdjustIR final : public ModulePass {
   bool removePassThroughBuiltin(Module &M);
   bool removeCompareBuiltin(Module &M);
   bool sinkMinMax(Module &M);
+  bool removeGEPBuiltins(Module &M);
 };
 } // End anonymous namespace
 
@@ -361,10 +365,62 @@ void BPFCheckAndAdjustIR::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LoopInfoWrapperPass>();
 }
 
+static void unrollGEPLoad(CallInst *Call) {
+  auto [GEP, Load] = BPFPreserveStaticOffsetPass::reconstructLoad(Call);
+  GEP->insertBefore(Call);
+  Load->insertBefore(Call);
+  Call->replaceAllUsesWith(Load);
+  Call->eraseFromParent();
+}
+
+static void unrollGEPStore(CallInst *Call) {
+  auto [GEP, Store] = BPFPreserveStaticOffsetPass::reconstructStore(Call);
+  GEP->insertBefore(Call);
+  Store->insertBefore(Call);
+  Call->eraseFromParent();
+}
+
+static bool removeGEPBuiltinsInFunc(Function &F) {
+  SmallVector<CallInst *> GEPLoads;
+  SmallVector<CallInst *> GEPStores;
+  for (auto &BB : F)
+    for (auto &Insn : BB)
+      if (auto *Call = dyn_cast<CallInst>(&Insn))
+        if (auto *Called = Call->getCalledFunction())
+          switch (Called->getIntrinsicID()) {
+          case Intrinsic::bpf_getelementptr_and_load:
+            GEPLoads.push_back(Call);
+            break;
+          case Intrinsic::bpf_getelementptr_and_store:
+            GEPStores.push_back(Call);
+            break;
+          }
+
+  if (GEPLoads.empty() && GEPStores.empty())
+    return false;
+
+  for_each(GEPLoads, unrollGEPLoad);
+  for_each(GEPStores, unrollGEPStore);
+
+  return true;
+}
+
+// Rewrites the following builtins:
+// - llvm.bpf.getelementptr.and.load
+// - llvm.bpf.getelementptr.and.store
+// As (load (getelementptr ...)) or (store (getelementptr ...)).
+bool BPFCheckAndAdjustIR::removeGEPBuiltins(Module &M) {
+  bool Changed = false;
+  for (auto &F : M)
+    Changed = removeGEPBuiltinsInFunc(F) || Changed;
+  return Changed;
+}
+
 bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
   bool Changed = removePassThroughBuiltin(M);
   Changed = removeCompareBuiltin(M) || Changed;
   Changed = sinkMinMax(M) || Changed;
+  Changed = removeGEPBuiltins(M) || Changed;
   return Changed;
 }
 

diff  --git a/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp b/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp
new file mode 100644
index 000000000000000..e30a10a9f2679a8
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp
@@ -0,0 +1,680 @@
+//===------ BPFPreserveStaticOffset.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TLDR: replaces llvm.preserve.static.offset + GEP + load / store
+//           with llvm.bpf.getelementptr.and.load / store
+//
+// This file implements BPFPreserveStaticOffsetPass transformation.
+// This transformation address two BPF verifier specific issues:
+//
+// (a) Access to the fields of some structural types is allowed only
+//     using load and store instructions with static immediate offsets.
+//
+//     Examples of such types are `struct __sk_buff` and `struct
+//     bpf_sock_ops`.  This is so because offsets of the fields of
+//     these structures do not match real offsets in the running
+//     kernel. During BPF program load LDX and STX instructions
+//     referring to the fields of these types are rewritten so that
+//     offsets match real offsets. For this rewrite to happen field
+//     offsets have to be encoded as immediate operands of the
+//     instructions.
+//
+//     See kernel/bpf/verifier.c:convert_ctx_access function in the
+//     Linux kernel source tree for details.
+//
+// (b) Pointers to context parameters of BPF programs must not be
+//     modified before access.
+//
+//     During BPF program verification a tag PTR_TO_CTX is tracked for
+//     register values. In case if register with such tag is modified
+//     BPF program is not allowed to read or write memory using this
+//     register. See kernel/bpf/verifier.c:check_mem_access function
+//     in the Linux kernel source tree for details.
+//
+// The following sequence of the IR instructions:
+//
+//   %x = getelementptr %ptr, %constant_offset
+//   %y = load %x
+//
+// Is translated as a single machine instruction:
+//
+//   LDW %ptr, %constant_offset
+//
+// In order for cases (a) and (b) to work the sequence %x-%y above has
+// to be preserved by the IR passes.
+//
+// However, several optimization passes might sink `load` instruction
+// or hoist `getelementptr` instruction so that the instructions are
+// no longer in sequence. Examples of such passes are:
+// SimplifyCFGPass, InstCombinePass, GVNPass.
+// After such modification the verifier would reject the BPF program.
+//
+// To avoid this issue the patterns like (load/store (getelementptr ...))
+// are replaced by calls to BPF specific intrinsic functions:
+// - llvm.bpf.getelementptr.and.load
+// - llvm.bpf.getelementptr.and.store
+//
+// These calls are lowered back to (load/store (getelementptr ...))
+// by BPFCheckAndAdjustIR pass right before the translation from IR to
+// machine instructions.
+//
+// The transformation is split into the following steps:
+// - When IR is generated from AST the calls to intrinsic function
+//   llvm.preserve.static.offset are inserted.
+// - BPFPreserveStaticOffsetPass is executed as early as possible
+//   with AllowPatial set to true, this handles marked GEP chains
+//   with constant offsets.
+// - BPFPreserveStaticOffsetPass is executed at ScalarOptimizerLateEPCallback
+//   with AllowPatial set to false, this handles marked GEP chains
+//   with offsets that became constant after loop unrolling, e.g.
+//   to handle the following code:
+//
+// struct context { int x[4]; } __attribute__((preserve_static_offset));
+//
+//   struct context *ctx = ...;
+// #pragma clang loop unroll(full)
+//   for (int i = 0; i < 4; ++i)
+//     foo(ctx->x[i]);
+//
+// The early BPFPreserveStaticOffsetPass run is necessary to allow
+// additional GVN / CSE opportunities after functions inlining.
+// The relative order of optimization applied to function:
+// - early stage (1)
+// - ...
+// - function inlining (2)
+// - ...
+// - loop unrolling
+// - ...
+// - ScalarOptimizerLateEPCallback (3)
+//
+// When function A is inlined into function B all optimizations for A
+// are already done, while some passes remain for B. In case if
+// BPFPreserveStaticOffsetPass is done at (3) but not done at (1)
+// the code after (2) would contain a mix of
+// (load (gep %p)) and (get.and.load %p) usages:
+// - the (load (gep %p)) would come from the calling function;
+// - the (get.and.load %p) would come from the callee function.
+// Thus clobbering CSE / GVN passes done after inlining.
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define DEBUG_TYPE "bpf-preserve-static-offset"
+
+using namespace llvm;
+
+static const unsigned GepAndLoadFirstIdxArg = 6;
+static const unsigned GepAndStoreFirstIdxArg = 7;
+
+static bool isIntrinsicCall(Value *I, Intrinsic::ID Id) {
+  if (auto *Call = dyn_cast<CallInst>(I))
+    if (Function *Func = Call->getCalledFunction())
+      return Func->getIntrinsicID() == Id;
+  return false;
+}
+
+static bool isPreserveStaticOffsetCall(Value *I) {
+  return isIntrinsicCall(I, Intrinsic::preserve_static_offset);
+}
+
+static CallInst *isGEPAndLoad(Value *I) {
+  if (isIntrinsicCall(I, Intrinsic::bpf_getelementptr_and_load))
+    return cast<CallInst>(I);
+  return nullptr;
+}
+
+static CallInst *isGEPAndStore(Value *I) {
+  if (isIntrinsicCall(I, Intrinsic::bpf_getelementptr_and_store))
+    return cast<CallInst>(I);
+  return nullptr;
+}
+
+template <class T = Instruction>
+static DILocation *mergeDILocations(SmallVector<T *> &Insns) {
+  DILocation *Merged = (*Insns.begin())->getDebugLoc();
+  for (T *I : Insns)
+    Merged = DILocation::getMergedLocation(Merged, I->getDebugLoc());
+  return Merged;
+}
+
+static CallInst *makeIntrinsicCall(Module *M,
+                                   Intrinsic::BPFIntrinsics Intrinsic,
+                                   ArrayRef<Type *> Types,
+                                   ArrayRef<Value *> Args) {
+
+  Function *Fn = Intrinsic::getDeclaration(M, Intrinsic, Types);
+  return CallInst::Create(Fn, Args);
+}
+
+static void setParamElementType(CallInst *Call, unsigned ArgNo, Type *Type) {
+  LLVMContext &C = Call->getContext();
+  Call->addParamAttr(ArgNo, Attribute::get(C, Attribute::ElementType, Type));
+}
+
+static void setParamReadNone(CallInst *Call, unsigned ArgNo) {
+  LLVMContext &C = Call->getContext();
+  Call->addParamAttr(ArgNo, Attribute::get(C, Attribute::ReadNone));
+}
+
+static void setParamReadOnly(CallInst *Call, unsigned ArgNo) {
+  LLVMContext &C = Call->getContext();
+  Call->addParamAttr(ArgNo, Attribute::get(C, Attribute::ReadOnly));
+}
+
+static void setParamWriteOnly(CallInst *Call, unsigned ArgNo) {
+  LLVMContext &C = Call->getContext();
+  Call->addParamAttr(ArgNo, Attribute::get(C, Attribute::WriteOnly));
+}
+
+namespace {
+struct GEPChainInfo {
+  bool InBounds;
+  Type *SourceElementType;
+  SmallVector<Value *> Indices;
+  SmallVector<GetElementPtrInst *> Members;
+
+  GEPChainInfo() { reset(); }
+
+  void reset() {
+    InBounds = true;
+    SourceElementType = nullptr;
+    Indices.clear();
+    Members.clear();
+  }
+};
+} // Anonymous namespace
+
+template <class T = std::disjunction<LoadInst, StoreInst>>
+static void fillCommonArgs(LLVMContext &C, SmallVector<Value *> &Args,
+                           GEPChainInfo &GEP, T *Insn) {
+  Type *Int8Ty = Type::getInt8Ty(C);
+  Type *Int1Ty = Type::getInt1Ty(C);
+  // Implementation of Align guarantees that ShiftValue < 64
+  unsigned AlignShiftValue = Log2_64(Insn->getAlign().value());
+  Args.push_back(GEP.Members[0]->getPointerOperand());
+  Args.push_back(ConstantInt::get(Int1Ty, Insn->isVolatile()));
+  Args.push_back(ConstantInt::get(Int8Ty, (unsigned)Insn->getOrdering()));
+  Args.push_back(ConstantInt::get(Int8Ty, (unsigned)Insn->getSyncScopeID()));
+  Args.push_back(ConstantInt::get(Int8Ty, AlignShiftValue));
+  Args.push_back(ConstantInt::get(Int1Ty, GEP.InBounds));
+  Args.append(GEP.Indices.begin(), GEP.Indices.end());
+}
+
+static Instruction *makeGEPAndLoad(Module *M, GEPChainInfo &GEP,
+                                   LoadInst *Load) {
+  SmallVector<Value *> Args;
+  fillCommonArgs(M->getContext(), Args, GEP, Load);
+  CallInst *Call = makeIntrinsicCall(M, Intrinsic::bpf_getelementptr_and_load,
+                                     {Load->getType()}, Args);
+  setParamElementType(Call, 0, GEP.SourceElementType);
+  Call->applyMergedLocation(mergeDILocations(GEP.Members), Load->getDebugLoc());
+  Call->setName((*GEP.Members.rbegin())->getName());
+  if (Load->isUnordered()) {
+    Call->setOnlyReadsMemory();
+    Call->setOnlyAccessesArgMemory();
+    setParamReadOnly(Call, 0);
+  }
+  for (unsigned I = GepAndLoadFirstIdxArg; I < Args.size(); ++I)
+    Call->addParamAttr(I, Attribute::ImmArg);
+  Call->setAAMetadata(Load->getAAMetadata());
+  return Call;
+}
+
+static Instruction *makeGEPAndStore(Module *M, GEPChainInfo &GEP,
+                                    StoreInst *Store) {
+  SmallVector<Value *> Args;
+  Args.push_back(Store->getValueOperand());
+  fillCommonArgs(M->getContext(), Args, GEP, Store);
+  CallInst *Call =
+      makeIntrinsicCall(M, Intrinsic::bpf_getelementptr_and_store,
+                        {Store->getValueOperand()->getType()}, Args);
+  setParamElementType(Call, 1, GEP.SourceElementType);
+  if (Store->getValueOperand()->getType()->isPointerTy())
+    setParamReadNone(Call, 0);
+  Call->applyMergedLocation(mergeDILocations(GEP.Members),
+                            Store->getDebugLoc());
+  if (Store->isUnordered()) {
+    Call->setOnlyWritesMemory();
+    Call->setOnlyAccessesArgMemory();
+    setParamWriteOnly(Call, 1);
+  }
+  for (unsigned I = GepAndStoreFirstIdxArg; I < Args.size(); ++I)
+    Call->addParamAttr(I, Attribute::ImmArg);
+  Call->setAAMetadata(Store->getAAMetadata());
+  return Call;
+}
+
+static unsigned getOperandAsUnsigned(CallInst *Call, unsigned ArgNo) {
+  if (auto *Int = dyn_cast<ConstantInt>(Call->getOperand(ArgNo)))
+    return Int->getValue().getZExtValue();
+  std::string Report;
+  raw_string_ostream ReportS(Report);
+  ReportS << "Expecting ConstantInt as argument #" << ArgNo << " of " << *Call
+          << "\n";
+  report_fatal_error(StringRef(Report));
+}
+
+static GetElementPtrInst *reconstructGEP(CallInst *Call, int Delta) {
+  SmallVector<Value *> Indices;
+  Indices.append(Call->data_operands_begin() + 6 + Delta,
+                 Call->data_operands_end());
+  Type *GEPPointeeType = Call->getParamElementType(Delta);
+  auto *GEP =
+      GetElementPtrInst::Create(GEPPointeeType, Call->getOperand(Delta),
+                                ArrayRef<Value *>(Indices), Call->getName());
+  GEP->setIsInBounds(getOperandAsUnsigned(Call, 5 + Delta));
+  return GEP;
+}
+
+template <class T = std::disjunction<LoadInst, StoreInst>>
+static void reconstructCommon(CallInst *Call, GetElementPtrInst *GEP, T *Insn,
+                              int Delta) {
+  Insn->setVolatile(getOperandAsUnsigned(Call, 1 + Delta));
+  Insn->setOrdering((AtomicOrdering)getOperandAsUnsigned(Call, 2 + Delta));
+  Insn->setSyncScopeID(getOperandAsUnsigned(Call, 3 + Delta));
+  unsigned AlignShiftValue = getOperandAsUnsigned(Call, 4 + Delta);
+  Insn->setAlignment(Align(1ULL << AlignShiftValue));
+  GEP->setDebugLoc(Call->getDebugLoc());
+  Insn->setDebugLoc(Call->getDebugLoc());
+  Insn->setAAMetadata(Call->getAAMetadata());
+}
+
+std::pair<GetElementPtrInst *, LoadInst *>
+BPFPreserveStaticOffsetPass::reconstructLoad(CallInst *Call) {
+  GetElementPtrInst *GEP = reconstructGEP(Call, 0);
+  Type *ReturnType = Call->getFunctionType()->getReturnType();
+  auto *Load = new LoadInst(ReturnType, GEP, "",
+                            /* These would be set in reconstructCommon */
+                            false, Align(1));
+  reconstructCommon(Call, GEP, Load, 0);
+  return std::pair{GEP, Load};
+}
+
+std::pair<GetElementPtrInst *, StoreInst *>
+BPFPreserveStaticOffsetPass::reconstructStore(CallInst *Call) {
+  GetElementPtrInst *GEP = reconstructGEP(Call, 1);
+  auto *Store = new StoreInst(Call->getOperand(0), GEP,
+                              /* These would be set in reconstructCommon */
+                              false, Align(1));
+  reconstructCommon(Call, GEP, Store, 1);
+  return std::pair{GEP, Store};
+}
+
+static bool isZero(Value *V) {
+  auto *CI = dyn_cast<ConstantInt>(V);
+  return CI && CI->isZero();
+}
+
+// Given a chain of GEP instructions collect information necessary to
+// merge this chain as a single GEP instruction of form:
+//   getelementptr %<type>, ptr %p, i32 0, <field_idx1>, <field_idx2>, ...
+static bool foldGEPChainAsStructAccess(SmallVector<GetElementPtrInst *> &GEPs,
+                                       GEPChainInfo &Info) {
+  if (GEPs.empty())
+    return false;
+
+  if (!all_of(GEPs, [=](GetElementPtrInst *GEP) {
+        return GEP->hasAllConstantIndices();
+      }))
+    return false;
+
+  GetElementPtrInst *First = GEPs[0];
+  Info.InBounds = First->isInBounds();
+  Info.SourceElementType = First->getSourceElementType();
+  Type *ResultElementType = First->getResultElementType();
+  Info.Indices.append(First->idx_begin(), First->idx_end());
+  Info.Members.push_back(First);
+
+  for (auto *Iter = GEPs.begin() + 1; Iter != GEPs.end(); ++Iter) {
+    GetElementPtrInst *GEP = *Iter;
+    if (!isZero(*GEP->idx_begin())) {
+      Info.reset();
+      return false;
+    }
+    if (!GEP->getSourceElementType() ||
+        GEP->getSourceElementType() != ResultElementType) {
+      Info.reset();
+      return false;
+    }
+    Info.InBounds &= GEP->isInBounds();
+    Info.Indices.append(GEP->idx_begin() + 1, GEP->idx_end());
+    Info.Members.push_back(GEP);
+    ResultElementType = GEP->getResultElementType();
+  }
+
+  return true;
+}
+
+// Given a chain of GEP instructions collect information necessary to
+// merge this chain as a single GEP instruction of form:
+//   getelementptr i8, ptr %p, i64 %offset
+static bool foldGEPChainAsU8Access(SmallVector<GetElementPtrInst *> &GEPs,
+                                   GEPChainInfo &Info) {
+  if (GEPs.empty())
+    return false;
+
+  GetElementPtrInst *First = GEPs[0];
+  const DataLayout &DL = First->getModule()->getDataLayout();
+  LLVMContext &C = First->getContext();
+  Type *PtrTy = First->getType()->getScalarType();
+  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
+  for (GetElementPtrInst *GEP : GEPs) {
+    if (!GEP->accumulateConstantOffset(DL, Offset)) {
+      Info.reset();
+      return false;
+    }
+    Info.InBounds &= GEP->isInBounds();
+    Info.Members.push_back(GEP);
+  }
+  Info.SourceElementType = Type::getInt8Ty(C);
+  Info.Indices.push_back(ConstantInt::get(C, Offset));
+
+  return true;
+}
+
+static void reportNonStaticGEPChain(Instruction *Insn) {
+  auto Msg = DiagnosticInfoUnsupported(
+      *Insn->getFunction(),
+      Twine("Non-constant offset in access to a field of a type marked "
+            "with preserve_static_offset might be rejected by BPF verifier")
+          .concat(Insn->getDebugLoc()
+                      ? ""
+                      : " (pass -g option to get exact location)"),
+      Insn->getDebugLoc(), DS_Warning);
+  Insn->getContext().diagnose(Msg);
+}
+
+static bool allZeroIndices(SmallVector<GetElementPtrInst *> &GEPs) {
+  return GEPs.empty() || all_of(GEPs, [=](GetElementPtrInst *GEP) {
+           return GEP->hasAllZeroIndices();
+         });
+}
+
+static bool tryToReplaceWithGEPBuiltin(Instruction *LoadOrStoreTemplate,
+                                       SmallVector<GetElementPtrInst *> &GEPs,
+                                       Instruction *InsnToReplace) {
+  GEPChainInfo GEPChain;
+  if (!foldGEPChainAsStructAccess(GEPs, GEPChain) &&
+      !foldGEPChainAsU8Access(GEPs, GEPChain)) {
+    return false;
+  }
+  Module *M = InsnToReplace->getModule();
+  if (auto *Load = dyn_cast<LoadInst>(LoadOrStoreTemplate)) {
+    Instruction *Replacement = makeGEPAndLoad(M, GEPChain, Load);
+    Replacement->insertBefore(InsnToReplace);
+    InsnToReplace->replaceAllUsesWith(Replacement);
+  }
+  if (auto *Store = dyn_cast<StoreInst>(LoadOrStoreTemplate)) {
+    Instruction *Replacement = makeGEPAndStore(M, GEPChain, Store);
+    Replacement->insertBefore(InsnToReplace);
+  }
+  return true;
+}
+
+// Check if U->getPointerOperand() == I
+static bool isPointerOperand(Value *I, User *U) {
+  if (auto *L = dyn_cast<LoadInst>(U))
+    return L->getPointerOperand() == I;
+  if (auto *S = dyn_cast<StoreInst>(U))
+    return S->getPointerOperand() == I;
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+    return GEP->getPointerOperand() == I;
+  if (auto *Call = isGEPAndLoad(U))
+    return Call->getArgOperand(0) == I;
+  if (auto *Call = isGEPAndStore(U))
+    return Call->getArgOperand(1) == I;
+  return false;
+}
+
+static bool isInlineableCall(User *U) {
+  if (auto *Call = dyn_cast<CallInst>(U))
+    return Call->hasFnAttr(Attribute::InlineHint);
+  return false;
+}
+
+static void rewriteAccessChain(Instruction *Insn,
+                               SmallVector<GetElementPtrInst *> &GEPs,
+                               SmallVector<Instruction *> &Visited,
+                               bool AllowPatial, bool &StillUsed);
+
+static void rewriteUses(Instruction *Insn,
+                        SmallVector<GetElementPtrInst *> &GEPs,
+                        SmallVector<Instruction *> &Visited, bool AllowPatial,
+                        bool &StillUsed) {
+  for (User *U : Insn->users()) {
+    auto *UI = dyn_cast<Instruction>(U);
+    if (UI && (isPointerOperand(Insn, UI) || isPreserveStaticOffsetCall(UI) ||
+               isInlineableCall(UI)))
+      rewriteAccessChain(UI, GEPs, Visited, AllowPatial, StillUsed);
+    else
+      LLVM_DEBUG({
+        llvm::dbgs() << "unsupported usage in BPFPreserveStaticOffsetPass:\n";
+        llvm::dbgs() << "  Insn: " << *Insn << "\n";
+        llvm::dbgs() << "  User: " << *U << "\n";
+      });
+  }
+}
+
+// A DFS traversal of GEP chain trees starting from Root.
+//
+// Recursion descends through GEP instructions and
+// llvm.preserve.static.offset calls. Recursion stops at any other
+// instruction. If load or store instruction is reached it is replaced
+// by a call to `llvm.bpf.getelementptr.and.load` or
+// `llvm.bpf.getelementptr.and.store` intrinsic.
+// If `llvm.bpf.getelementptr.and.load/store` is reached the accumulated
+// GEPs are merged into the intrinsic call.
+// If nested calls to `llvm.preserve.static.offset` are encountered these
+// calls are marked for deletion.
+//
+// Parameters description:
+// - Insn - current position in the tree
+// - GEPs - GEP instructions for the current branch
+// - Visited - a list of visited instructions in DFS order,
+//   order is important for unused instruction deletion.
+// - AllowPartial - when true GEP chains that can't be folded are
+//   not reported, otherwise diagnostic message is show for such chains.
+// - StillUsed - set to true if one of the GEP chains could not be
+//   folded, makes sense when AllowPartial is false, means that root
+//   preserve.static.offset call is still in use and should remain
+//   until the next run of this pass.
+static void rewriteAccessChain(Instruction *Insn,
+                               SmallVector<GetElementPtrInst *> &GEPs,
+                               SmallVector<Instruction *> &Visited,
+                               bool AllowPatial, bool &StillUsed) {
+  auto MarkAndTraverseUses = [&]() {
+    Visited.push_back(Insn);
+    rewriteUses(Insn, GEPs, Visited, AllowPatial, StillUsed);
+  };
+  auto TryToReplace = [&](Instruction *LoadOrStore) {
+    // Do nothing for (preserve.static.offset (load/store ..)) or for
+    // GEPs with zero indices. Such constructs lead to zero offset and
+    // are simplified by other passes.
+    if (allZeroIndices(GEPs))
+      return;
+    if (tryToReplaceWithGEPBuiltin(LoadOrStore, GEPs, Insn)) {
+      Visited.push_back(Insn);
+      return;
+    }
+    if (!AllowPatial)
+      reportNonStaticGEPChain(Insn);
+    StillUsed = true;
+  };
+  if (isa<LoadInst>(Insn) || isa<StoreInst>(Insn)) {
+    TryToReplace(Insn);
+  } else if (isGEPAndLoad(Insn)) {
+    auto [GEP, Load] =
+        BPFPreserveStaticOffsetPass::reconstructLoad(cast<CallInst>(Insn));
+    GEPs.push_back(GEP);
+    TryToReplace(Load);
+    GEPs.pop_back();
+    delete Load;
+    delete GEP;
+  } else if (isGEPAndStore(Insn)) {
+    // This  case can't be merged with the above because
+    // `delete Load` / `delete Store` wants a concrete type,
+    // destructor of Instruction is protected.
+    auto [GEP, Store] =
+        BPFPreserveStaticOffsetPass::reconstructStore(cast<CallInst>(Insn));
+    GEPs.push_back(GEP);
+    TryToReplace(Store);
+    GEPs.pop_back();
+    delete Store;
+    delete GEP;
+  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Insn)) {
+    GEPs.push_back(GEP);
+    MarkAndTraverseUses();
+    GEPs.pop_back();
+  } else if (isPreserveStaticOffsetCall(Insn)) {
+    MarkAndTraverseUses();
+  } else if (isInlineableCall(Insn)) {
+    // Preserve preserve.static.offset call for parameters of
+    // functions that might be inlined. These would be removed on a
+    // second pass after inlining.
+    // Might happen when a pointer to a preserve_static_offset
+    // structure is passed as parameter of a function that would be
+    // inlined inside a loop that would be unrolled.
+    if (AllowPatial)
+      StillUsed = true;
+  } else {
+    SmallString<128> Buf;
+    raw_svector_ostream BufStream(Buf);
+    BufStream << *Insn;
+    report_fatal_error(
+        Twine("Unexpected rewriteAccessChain Insn = ").concat(Buf));
+  }
+}
+
+static void removeMarkerCall(Instruction *Marker) {
+  Marker->replaceAllUsesWith(Marker->getOperand(0));
+  Marker->eraseFromParent();
+}
+
+static bool rewriteAccessChain(Instruction *Marker, bool AllowPatial,
+                               SmallPtrSetImpl<Instruction *> &RemovedMarkers) {
+  SmallVector<GetElementPtrInst *> GEPs;
+  SmallVector<Instruction *> Visited;
+  bool StillUsed = false;
+  rewriteUses(Marker, GEPs, Visited, AllowPatial, StillUsed);
+  // Check if Visited instructions could be removed, iterate in
+  // reverse to unblock instructions higher in the chain.
+  for (auto V = Visited.rbegin(); V != Visited.rend(); ++V) {
+    if (isPreserveStaticOffsetCall(*V)) {
+      removeMarkerCall(*V);
+      RemovedMarkers.insert(*V);
+    } else if ((*V)->use_empty()) {
+      (*V)->eraseFromParent();
+    }
+  }
+  return StillUsed;
+}
+
+static std::vector<Instruction *>
+collectPreserveStaticOffsetCalls(Function &F) {
+  std::vector<Instruction *> Calls;
+  for (Instruction &Insn : instructions(F))
+    if (isPreserveStaticOffsetCall(&Insn))
+      Calls.push_back(&Insn);
+  return Calls;
+}
+
+bool isPreserveArrayIndex(Value *V) {
+  return isIntrinsicCall(V, Intrinsic::preserve_array_access_index);
+}
+
+bool isPreserveStructIndex(Value *V) {
+  return isIntrinsicCall(V, Intrinsic::preserve_struct_access_index);
+}
+
+bool isPreserveUnionIndex(Value *V) {
+  return isIntrinsicCall(V, Intrinsic::preserve_union_access_index);
+}
+
+static void removePAICalls(Instruction *Marker) {
+  auto IsPointerOperand = [](Value *Op, User *U) {
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+      return GEP->getPointerOperand() == Op;
+    if (isPreserveStaticOffsetCall(U) || isPreserveArrayIndex(U) ||
+        isPreserveStructIndex(U) || isPreserveUnionIndex(U))
+      return cast<CallInst>(U)->getArgOperand(0) == Op;
+    return false;
+  };
+
+  SmallVector<Value *, 32> WorkList;
+  WorkList.push_back(Marker);
+  do {
+    Value *V = WorkList.pop_back_val();
+    for (User *U : V->users())
+      if (IsPointerOperand(V, U))
+        WorkList.push_back(U);
+    auto *Call = dyn_cast<CallInst>(V);
+    if (!Call)
+      continue;
+    if (isPreserveArrayIndex(V))
+      BPFCoreSharedInfo::removeArrayAccessCall(Call);
+    if (isPreserveStructIndex(V))
+      BPFCoreSharedInfo::removeStructAccessCall(Call);
+    if (isPreserveUnionIndex(V))
+      BPFCoreSharedInfo::removeUnionAccessCall(Call);
+  } while (!WorkList.empty());
+}
+
+// Look for sequences:
+// - llvm.preserve.static.offset -> getelementptr... -> load
+// - llvm.preserve.static.offset -> getelementptr... -> store
+// And replace those with calls to intrinsics:
+// - llvm.bpf.getelementptr.and.load
+// - llvm.bpf.getelementptr.and.store
+static bool rewriteFunction(Function &F, bool AllowPartial) {
+  LLVM_DEBUG(dbgs() << "********** BPFPreserveStaticOffsetPass (AllowPartial="
+                    << AllowPartial << ") ************\n");
+
+  auto MarkerCalls = collectPreserveStaticOffsetCalls(F);
+  SmallPtrSet<Instruction *, 16> RemovedMarkers;
+
+  LLVM_DEBUG(dbgs() << "There are " << MarkerCalls.size()
+                    << " preserve.static.offset calls\n");
+
+  if (MarkerCalls.empty())
+    return false;
+
+  for (auto *Call : MarkerCalls)
+    removePAICalls(Call);
+
+  for (auto *Call : MarkerCalls) {
+    if (RemovedMarkers.contains(Call))
+      continue;
+    bool StillUsed = rewriteAccessChain(Call, AllowPartial, RemovedMarkers);
+    if (!StillUsed || !AllowPartial)
+      removeMarkerCall(Call);
+  }
+
+  return true;
+}
+
+PreservedAnalyses
+llvm::BPFPreserveStaticOffsetPass::run(Function &F,
+                                       FunctionAnalysisManager &AM) {
+  return rewriteFunction(F, AllowPartial) ? PreservedAnalyses::none()
+                                          : PreservedAnalyses::all();
+}

diff  --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 983a4ff6aa5c761..65286c822c4b58a 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -105,11 +105,16 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
           FPM.addPass(BPFIRPeepholePass());
           return true;
         }
+        if (PassName == "bpf-preserve-static-offset") {
+          FPM.addPass(BPFPreserveStaticOffsetPass(false));
+          return true;
+        }
         return false;
       });
   PB.registerPipelineStartEPCallback(
       [=](ModulePassManager &MPM, OptimizationLevel) {
         FunctionPassManager FPM;
+        FPM.addPass(BPFPreserveStaticOffsetPass(true));
         FPM.addPass(BPFAbstractMemberAccessPass(this));
         FPM.addPass(BPFPreserveDITypePass());
         FPM.addPass(BPFIRPeepholePass());
@@ -119,6 +124,12 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
                                     OptimizationLevel Level) {
     FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
   });
+  PB.registerScalarOptimizerLateEPCallback(
+      [=](FunctionPassManager &FPM, OptimizationLevel Level) {
+        // Run this after loop unrolling but before
+        // SimplifyCFGPass(... .sinkCommonInsts(true))
+        FPM.addPass(BPFPreserveStaticOffsetPass(false));
+      });
   PB.registerPipelineEarlySimplificationEPCallback(
       [=](ModulePassManager &MPM, OptimizationLevel) {
         MPM.addPass(BPFAdjustOptPass());

diff  --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index f4a8fa3674cd3e4..6a96394a6aee3e5 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_target(BPFCodeGen
   BPFISelLowering.cpp
   BPFMCInstLower.cpp
   BPFPreserveDIType.cpp
+  BPFPreserveStaticOffset.cpp
   BPFRegisterInfo.cpp
   BPFSelectionDAGInfo.cpp
   BPFSubtarget.cpp

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-align.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-align.ll
new file mode 100644
index 000000000000000..0a0f3c22e3740bb
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-align.ll
@@ -0,0 +1,66 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check handling of a load instruction for a field with non-standard
+; alignment by bpf-preserve-static-offset.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    typedef int aligned_int __attribute__((aligned(128)));
+;    
+;    struct foo {
+;      int _;
+;      aligned_int a;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p) {
+;      consume(p->a);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, [124 x i8], i32, [124 x i8] }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 2
+  %1 = load i32, ptr %a, align 128, !tbaa !2
+  call void @consume(i32 noundef %1)
+  ret void
+}
+
+; CHECK:      %[[a1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:      (ptr readonly elementtype(%struct.foo) %{{[^,]+}},
+; CHECK-SAME:       i1 false, i8 0, i8 1, i8 7, i1 true, i32 immarg 0, i32 immarg 2)
+;                                         ^^^^
+;                                     alignment 2**7
+; CHECK-SAME:         #[[v2:.*]], !tbaa
+; CHECK-NEXT: call void @consume(i32 noundef %[[a1]])
+; CHECK:      attributes #[[v2]] = { memory(argmem: read) }
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 128}
+!3 = !{!"foo", !4, i64 0, !4, i64 128}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-arr-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-arr-pai.ll
new file mode 100644
index 000000000000000..d6b1f30fa386746
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-arr-pai.ll
@@ -0,0 +1,93 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    #define __pai __attribute__((preserve_access_index))
+;    
+;    struct bar {
+;      int a[7];
+;    } __pai __ctx;
+;    
+;    int buz(struct bar *p) {
+;      return p->a[5];
+;    }
+;    
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -debug-info-kind=limited -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.bar = type { [7 x i32] }
+
+; Function Attrs: nounwind
+define dso_local i32 @buz(ptr noundef %p) #0 !dbg !10 {
+entry:
+  call void @llvm.dbg.value(metadata ptr %p, metadata !18, metadata !DIExpression()), !dbg !19
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p), !dbg !20
+  %1 = call ptr @llvm.preserve.struct.access.index.p0.p0(ptr elementtype(%struct.bar) %0, i32 0, i32 0), !dbg !20, !llvm.preserve.access.index !14
+  %2 = call ptr @llvm.preserve.array.access.index.p0.p0(ptr elementtype([7 x i32]) %1, i32 1, i32 5), !dbg !21, !llvm.preserve.access.index !3
+  %3 = load i32, ptr %2, align 4, !dbg !21, !tbaa !22
+  ret i32 %3, !dbg !26
+}
+
+; CHECK:      define dso_local i32 @buz(ptr noundef %[[p:.*]]) {{.*}} {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.dbg.value
+; CHECK-NEXT:   %[[v5:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:       (ptr readonly elementtype(%struct.bar) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 0, i32 immarg 5)
+; CHECK-SAME:      #[[v6:.*]], !tbaa
+; CHECK-NEXT:   ret i32 %[[v5]]
+; CHECK-NEXT: }
+; CHECK:      attributes #[[v6]] = { memory(argmem: read) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.struct.access.index.p0.p0(ptr, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.array.access.index.p0.p0(ptr, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "some-file.c", directory: "/some/dir/")
+!2 = !{!3}
+!3 = !DICompositeType(tag: DW_TAG_array_type, baseType: !4, size: 224, elements: !5)
+!4 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!5 = !{!6}
+!6 = !DISubrange(count: 7)
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i32 1, !"wchar_size", i32 4}
+!9 = !{!"clang"}
+!10 = distinct !DISubprogram(name: "buz", scope: !1, file: !1, line: 8, type: !11, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !17)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!4, !13}
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64)
+!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "bar", file: !1, line: 4, size: 224, elements: !15)
+!15 = !{!16}
+!16 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !14, file: !1, line: 5, baseType: !3, size: 224)
+!17 = !{!18}
+!18 = !DILocalVariable(name: "p", arg: 1, scope: !10, file: !1, line: 8, type: !13)
+!19 = !DILocation(line: 0, scope: !10)
+!20 = !DILocation(line: 9, column: 13, scope: !10)
+!21 = !DILocation(line: 9, column: 10, scope: !10)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"int", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !DILocation(line: 9, column: 3, scope: !10)

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-atomic.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-atomic.ll
new file mode 100644
index 000000000000000..0a0c8ce9af5f420
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-atomic.ll
@@ -0,0 +1,66 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check handling of atomic load instruction by bpf-preserve-static-offset.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int _;
+;      int a;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p) {
+;      int r;
+;      __atomic_load(&p->a, &r, 2);
+;      consume(r);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %1 = load atomic i32, ptr %a acquire, align 4
+  call void @consume(i32 noundef %1)
+  ret void
+}
+
+; CHECK:      %[[a1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:      (ptr elementtype(%struct.foo) %[[p:.*]],
+;                   i1 false, i8 4, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+;                             ^^^^
+;                          atomic order
+; CHECK-NOT:  #{{[0-9]+}}
+; CHECK-NEXT: call void @consume(i32 noundef %[[a1]])
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+declare void @consume(i32 noundef) #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-2.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-2.ll
new file mode 100644
index 000000000000000..4832fb2a50c0a4d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-2.ll
@@ -0,0 +1,82 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds GEP chains that end by
+; getelementptr.and.load.
+;
+; Source (modified by hand):
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p) {
+;      consume(p->b.bb);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+;
+; And modified to fold last getelementptr/load as a single
+; getelementptr.and.load.
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %bb1 = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+           @llvm.bpf.getelementptr.and.load.i32
+             (ptr readonly elementtype(%struct.bar) %b,
+              i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+           #4, !tbaa !2
+  call void @consume(i32 noundef %bb1)
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr noundef %[[p:.*]])
+; CHECK:        %[[bb1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:       (ptr readonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1, i32 immarg 1)
+; CHECK-SAME:      #[[v2:.*]], !tbaa
+; CHECK-NEXT:   call void @consume(i32 noundef %[[bb1]])
+; CHECK:      attributes #[[v2]] = { memory(argmem: read) }
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare i32 @llvm.bpf.getelementptr.and.load.i32(ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #3
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nounwind willreturn }
+attributes #4 = { memory(argmem: read) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 8}
+!3 = !{!"foo", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"bar", !4, i64 0, !4, i64 4}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-oob.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-oob.ll
new file mode 100644
index 000000000000000..c4a92481a0b16a8
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-oob.ll
@@ -0,0 +1,73 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset keeps track of 'inbounds' flags while
+; folding chain of GEP instructions.
+;
+; Source (IR modified by hand):
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a[2];
+;    };
+;    
+;    struct bar {
+;      int a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void buz(struct bar *p) {
+;      consume(p->b.a[1]);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+;
+; Modified to remove one of the 'inbounds' from one of the GEP instructions.
+
+%struct.bar = type { i32, %struct.foo }
+%struct.foo = type { [2 x i32] }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.bar, ptr %0, i32 0, i32 1
+  %a = getelementptr %struct.foo, ptr %b, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
+  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2
+  call void @consume(i32 noundef %1)
+  ret void
+}
+
+; CHECK:      %[[v1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:      (ptr readonly elementtype(%struct.bar) %{{[^,]+}},
+; CHECK-SAME:       i1 false, i8 0, i8 1, i8 2, i1 false,
+;                                               ^^^^^^^^
+;                                             not inbounds
+; CHECK-SAME:       i32 immarg 0, i32 immarg 1, i32 immarg 0, i64 immarg 1)
+;                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+;                                         folded gep chain
+; CHECK-NEXT: call void @consume(i32 noundef %[[v1]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8-oob.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8-oob.ll
new file mode 100644
index 000000000000000..da3e01a455a5df9
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8-oob.ll
@@ -0,0 +1,74 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds chain of GEP instructions.
+; The GEP chain in this example has unexpected shape and thus is
+; folded as i8 access.
+;
+; Source (IR modified by hand):
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      char a[2];
+;    };
+;    
+;    struct bar {
+;      char a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    extern void consume(char);
+;    
+;    void buz(struct bar *p) {
+;      consume((&p->b)[1].a[1]);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+;
+; Modified to remove 'inbounds' from one of the GEP instructions.
+
+%struct.bar = type { i8, %struct.foo }
+%struct.foo = type { [2 x i8] }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.bar, ptr %0, i32 0, i32 1
+  %arrayidx = getelementptr inbounds %struct.foo, ptr %b, i64 1
+;                                                         ^^^^^
+;                                  folded as i8 access because of this index
+  %a = getelementptr %struct.foo, ptr %arrayidx, i32 0, i32 0
+  %arrayidx1 = getelementptr inbounds [2 x i8], ptr %a, i64 0, i64 1
+  %1 = load i8, ptr %arrayidx1, align 1, !tbaa !2
+  call void @consume(i8 noundef signext %1)
+  ret void
+}
+
+; CHECK:      %[[v1:.*]] = call i8 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i8
+; CHECK-SAME:      (ptr readonly elementtype(i8) %{{[^,]+}},
+; CHECK-SAME:       i1 false, i8 0, i8 1, i8 0, i1 false, i64 immarg 4)
+;                                               ^^^^^^^^  ^^^^^^^^^^^^
+;                                 not inbounds ---'         |
+;               offset from 'struct bar' start -------------'
+; CHECK-NEXT: call void @consume(i8 noundef signext %[[v1]])
+
+declare void @consume(i8 noundef signext) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8-type-mismatch.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8-type-mismatch.ll
new file mode 100644
index 000000000000000..757e06c507c698d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8-type-mismatch.ll
@@ -0,0 +1,73 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds chain of GEP instructions.
+; The GEP chain in this example has unexpected shape and thus is
+; folded as i8 access.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      char aa;
+;      char bb;
+;    };
+;    
+;    struct bar {
+;      char a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    extern void consume(char);
+;    
+;    void buz(struct bar *p) {
+;      consume(((struct foo *)(((char*)&p->b) + 1))->bb);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.bar = type { i8, %struct.foo }
+%struct.foo = type { i8, i8 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.bar, ptr %0, i32 0, i32 1
+  %add.ptr = getelementptr inbounds i8, ptr %b, i64 1
+;                                   ~~
+;         these types do not match, thus GEP chain is folded as an offset
+;                              ~~~~~~~~~~~
+  %bb = getelementptr inbounds %struct.foo, ptr %add.ptr, i32 0, i32 1
+  %1 = load i8, ptr %bb, align 1, !tbaa !2
+  call void @consume(i8 noundef signext %1)
+  ret void
+}
+
+; CHECK:      %[[bb1:.*]] = call i8 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:   @llvm.bpf.getelementptr.and.load.i8
+; CHECK-SAME:     (ptr readonly elementtype(i8) %{{[^,]+}},
+; CHECK-SAME:      i1 false, i8 0, i8 1, i8 0, i1 true, i64 immarg 3)
+;                                                       ^^^^^^^^^^^^
+;                                      offset from 'struct bar' start
+; CHECK-NEXT: call void @consume(i8 noundef signext %[[bb1]])
+
+declare void @consume(i8 noundef signext) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 1}
+!3 = !{!"foo", !4, i64 0, !4, i64 1}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8.ll
new file mode 100644
index 000000000000000..e91aa93775e1daa
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain-u8.ll
@@ -0,0 +1,71 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds chain of GEP instructions.
+; The GEP chain in this example has unexpected shape and thus is
+; folded as i8 access.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      char a[2];
+;    };
+;    
+;    struct bar {
+;      char a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    extern void consume(char);
+;    
+;    void buz(struct bar *p) {
+;      consume((&p->b)[1].a[1]);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.bar = type { i8, %struct.foo }
+%struct.foo = type { [2 x i8] }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.bar, ptr %0, i32 0, i32 1
+  %arrayidx = getelementptr inbounds %struct.foo, ptr %b, i64 1
+;                                                         ^^^^^
+;                                  folded as i8 access because of this index
+  %a = getelementptr inbounds %struct.foo, ptr %arrayidx, i32 0, i32 0
+  %arrayidx1 = getelementptr inbounds [2 x i8], ptr %a, i64 0, i64 1
+  %1 = load i8, ptr %arrayidx1, align 1, !tbaa !2
+  call void @consume(i8 noundef signext %1)
+  ret void
+}
+
+; CHECK:      %[[v1:.*]] = call i8 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i8
+; CHECK-SAME:      (ptr readonly elementtype(i8) %{{[^,]+}},
+; CHECK-SAME:       i1 false, i8 0, i8 1, i8 0, i1 true, i64 immarg 4)
+;                                                        ^^^^^^^^^^^^
+;                                       offset from 'struct bar' start
+; CHECK-NEXT: call void @consume(i8 noundef signext %[[v1]])
+
+declare void @consume(i8 noundef signext) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain.ll
new file mode 100644
index 000000000000000..ac08fed70c8a4e0
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-chain.ll
@@ -0,0 +1,68 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds chain of GEP instructions.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a[2];
+;    };
+;    
+;    struct bar {
+;      int a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void buz(struct bar *p) {
+;      consume(p->b.a[1]);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.bar = type { i32, %struct.foo }
+%struct.foo = type { [2 x i32] }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.bar, ptr %0, i32 0, i32 1
+  %a = getelementptr inbounds %struct.foo, ptr %b, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
+  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2
+  call void @consume(i32 noundef %1)
+  ret void
+}
+
+; CHECK:      %[[v1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:      (ptr readonly elementtype(%struct.bar) %{{[^,]+}},
+; CHECK-SAME:       i1 false, i8 0, i8 1, i8 2, i1 true,
+; CHECK-SAME:       i32 immarg 0, i32 immarg 1, i32 immarg 0, i64 immarg 1)
+;                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+;                                      folded gep chain
+; CHECK-NEXT: call void @consume(i32 noundef %[[v1]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-inline.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-inline.ll
new file mode 100644
index 000000000000000..9149b350dd89563
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-inline.ll
@@ -0,0 +1,85 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check position of bpf-preserve-static-offset pass in the pipeline:
+; - preserve.static.offset call is preserved if address is passed as
+;   a parameter to an inline-able function;
+; - second bpf-preserve-static-offset pass (after inlining) should introduce
+;   getelementptr.and.load call using the preserved marker.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    static inline void bar(struct bar *p){
+;      consume(p->bb);
+;    }
+;    
+;    void quux(struct foo *p) {
+;      bar(&p->b);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @quux(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  call void @bar(ptr noundef %b)
+  ret void
+}
+
+; Function Attrs: inlinehint nounwind
+define internal void @bar(ptr noundef %p) #1 {
+entry:
+  %bb = getelementptr inbounds %struct.bar, ptr %p, i32 0, i32 1
+  %0 = load i32, ptr %bb, align 4, !tbaa !2
+  call void @consume(i32 noundef %0)
+  ret void
+}
+
+; CHECK:      define dso_local void @quux(ptr nocapture noundef readonly %[[p:.*]])
+; CHECK:        %[[bb_i1:.*]] = tail call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:       (ptr readonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i64 immarg 0, i32 immarg 1, i32 immarg 1)
+; CHECK-SAME:      #[[v2:.*]], !tbaa
+; CHECK-NEXT:   tail call void @consume(i32 noundef %[[bb_i1]])
+; CHECK:      attributes #[[v2]] = { memory(argmem: read) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+declare void @consume(i32 noundef) #3
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { inlinehint nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"bar", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-non-const.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-non-const.ll
new file mode 100644
index 000000000000000..2dd6edf4c4b8a1b
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-non-const.ll
@@ -0,0 +1,75 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s 2>&1 | FileCheck %s
+;
+; If load offset is not a constant bpf-preserve-static-offset should report a
+; warning and remove preserve.static.offset call.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a[7];
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p, unsigned long i) {
+;      consume(p->a[i]);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -debug-info-kind=line-tables-only -triple bpf \
+;         -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+; CHECK:      warning: some-file.c:10:11: in function bar void (ptr, i64):
+; CHECK-SAME: Non-constant offset in access to a field of a type marked with
+; CHECK-SAME: preserve_static_offset might be rejected by BPF verifier
+
+%struct.foo = type { [7 x i32] }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p, i64 noundef %i) #0 !dbg !5 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p), !dbg !8
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 0, !dbg !8
+  %arrayidx = getelementptr inbounds [7 x i32], ptr %a, i64 0, i64 %i, !dbg !9
+  %1 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !10
+  call void @consume(i32 noundef %1), !dbg !14
+  ret void, !dbg !15
+}
+
+; CHECK:      define dso_local void @bar(ptr noundef %[[p:.*]], i64 noundef %[[i:.*]])
+; CHECK:        %[[a:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 0, !dbg
+; CHECK-NEXT:   %[[arrayidx:.*]] = getelementptr inbounds [7 x i32], ptr %[[a]], i64 0, i64 %[[i]], !dbg
+; CHECK-NEXT:   %[[v5:.*]] = load i32, ptr %[[arrayidx]], align 4, !dbg {{.*}}, !tbaa
+; CHECK-NEXT:   call void @consume(i32 noundef %[[v5]]), !dbg
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "some-file.c", directory: "/some/dir/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 1, !"wchar_size", i32 4}
+!4 = !{!"clang"}
+!5 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 9, type: !6, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !DILocation(line: 10, column: 14, scope: !5)
+!9 = !DILocation(line: 10, column: 11, scope: !5)
+!10 = !{!11, !11, i64 0}
+!11 = !{!"int", !12, i64 0}
+!12 = !{!"omnipotent char", !13, i64 0}
+!13 = !{!"Simple C/C++ TBAA"}
+!14 = !DILocation(line: 10, column: 3, scope: !5)
+!15 = !DILocation(line: 11, column: 1, scope: !5)

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-ptr-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-ptr-pai.ll
new file mode 100644
index 000000000000000..6ec59c6b2c02477
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-ptr-pai.ll
@@ -0,0 +1,114 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    #define __pai __attribute__((preserve_access_index))
+;    
+;    struct bar {
+;      int a;
+;      int b;
+;    } __pai;
+;    
+;    struct buz {
+;      int _1;
+;      struct bar *b;
+;    } __pai __ctx;
+;    
+;    void foo(struct buz *p) {
+;      p->b->b = 42;
+;    }
+;    
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes \
+;         -debug-info-kind=limited -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.buz = type { i32, ptr }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @foo(ptr noundef %p) #0 !dbg !5 {
+entry:
+  call void @llvm.dbg.value(metadata ptr %p, metadata !20, metadata !DIExpression()), !dbg !21
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p), !dbg !22
+  %1 = call ptr @llvm.preserve.struct.access.index.p0.p0(ptr elementtype(%struct.buz) %0, i32 1, i32 1), !dbg !22, !llvm.preserve.access.index !9
+  %2 = load ptr, ptr %1, align 8, !dbg !22, !tbaa !23
+  %3 = call ptr @llvm.preserve.struct.access.index.p0.p0(ptr elementtype(%struct.bar) %2, i32 1, i32 1), !dbg !29, !llvm.preserve.access.index !15
+  store i32 42, ptr %3, align 4, !dbg !30, !tbaa !31
+  ret void, !dbg !33
+}
+
+; CHECK:      define dso_local void @foo(ptr noundef %[[p:.*]]) {{.*}} {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.dbg.value
+; CHECK-NEXT:   %[[v5:.*]] = call ptr (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.p0
+; CHECK-SAME:       (ptr readonly elementtype(%struct.buz) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 3, i1 true, i32 immarg 0, i32 immarg 1)
+; CHECK-SAME:      #[[v6:.*]], !tbaa
+; CHECK-NEXT:   %[[v8:.*]] =
+; CHECK-SAME:     call ptr @llvm.preserve.struct.access.index.p0.p0
+; CHECK-SAME:       (ptr elementtype(%struct.bar) %[[v5]], i32 1, i32 1),
+; CHECK-SAME:        !dbg ![[#]], !llvm.preserve.access.index ![[#]]
+; CHECK-NEXT:   store i32 42, ptr %[[v8]], align 4, !dbg ![[#]], !tbaa
+; CHECK-NEXT:   ret void, !dbg
+; CHECK-NEXT: }
+
+; CHECK     : attributes #[[v6]] = { memory(argmem: read) }
+
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.struct.access.index.p0.p0(ptr, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "some-file.c", directory: "/some/dir/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 1, !"wchar_size", i32 4}
+!4 = !{!"clang"}
+!5 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 14, type: !6, scopeLine: 14, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !19)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !8}
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
+!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "buz", file: !1, line: 9, size: 128, elements: !10)
+!10 = !{!11, !13}
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "_1", scope: !9, file: !1, line: 10, baseType: !12, size: 32)
+!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !9, file: !1, line: 11, baseType: !14, size: 64, offset: 64)
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64)
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "bar", file: !1, line: 4, size: 64, elements: !16)
+!16 = !{!17, !18}
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !15, file: !1, line: 5, baseType: !12, size: 32)
+!18 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !15, file: !1, line: 6, baseType: !12, size: 32, offset: 32)
+!19 = !{!20}
+!20 = !DILocalVariable(name: "p", arg: 1, scope: !5, file: !1, line: 14, type: !8)
+!21 = !DILocation(line: 0, scope: !5)
+!22 = !DILocation(line: 15, column: 6, scope: !5)
+!23 = !{!24, !28, i64 8}
+!24 = !{!"buz", !25, i64 0, !28, i64 8}
+!25 = !{!"int", !26, i64 0}
+!26 = !{!"omnipotent char", !27, i64 0}
+!27 = !{!"Simple C/C++ TBAA"}
+!28 = !{!"any pointer", !26, i64 0}
+!29 = !DILocation(line: 15, column: 9, scope: !5)
+!30 = !DILocation(line: 15, column: 11, scope: !5)
+!31 = !{!32, !25, i64 4}
+!32 = !{!"bar", !25, i64 0, !25, i64 4}
+!33 = !DILocation(line: 16, column: 1, scope: !5)

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-simple.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-simple.ll
new file mode 100644
index 000000000000000..03ae7f3272dcf52
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-simple.ll
@@ -0,0 +1,71 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check handling of a simple load instruction by bpf-preserve-static-offset.
+; Verify:
+; - presence of gep.and.load intrinsic call
+; - correct attributes for intrinsic call
+; - presence of tbaa annotations
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int _;
+;      int a;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p) {
+;      consume(p->a);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %1 = load i32, ptr %a, align 4, !tbaa !2
+  call void @consume(i32 noundef %1)
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr noundef %[[p:.*]])
+; CHECK:      %[[a1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:      (ptr readonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:       i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+; CHECK-SAME:         #[[v1:.*]], !tbaa
+; CHECK-NEXT: call void @consume(i32 noundef %[[a1]])
+
+; CHECK:      declare i32
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i32(ptr nocapture, {{.*}}) #[[v2:.*]]
+
+; CHECK:      attributes #[[v2]] = { nocallback nofree nounwind willreturn }
+; CHECK:      attributes #[[v1]] = { memory(argmem: read) }
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"foo", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-struct-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-struct-pai.ll
new file mode 100644
index 000000000000000..5baa7ad0242cfe7
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-struct-pai.ll
@@ -0,0 +1,105 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    #define __pai __attribute__((preserve_access_index))
+;    
+;    struct foo {
+;      int a;
+;      int b;
+;    };
+;    
+;    struct bar {
+;      int _1;
+;      int _2;
+;      struct foo c;
+;    } __pai __ctx;
+;    
+;    int buz(struct bar *p) {
+;      return p->c.b;
+;    }
+;    
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes \
+;         -debug-info-kind=limited -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.bar = type { i32, i32, %struct.foo }
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local i32 @buz(ptr noundef %p) #0 !dbg !5 {
+entry:
+  call void @llvm.dbg.value(metadata ptr %p, metadata !20, metadata !DIExpression()), !dbg !21
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p), !dbg !22
+  %1 = call ptr @llvm.preserve.struct.access.index.p0.p0(ptr elementtype(%struct.bar) %0, i32 2, i32 2), !dbg !22, !llvm.preserve.access.index !10
+  %b = getelementptr inbounds %struct.foo, ptr %1, i32 0, i32 1, !dbg !23
+  %2 = load i32, ptr %b, align 4, !dbg !23, !tbaa !24
+  ret i32 %2, !dbg !30
+}
+
+; CHECK:      define dso_local i32 @buz(ptr noundef %[[p:.*]]) {{.*}} {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.dbg.value
+; CHECK-NEXT:   %[[b1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:       (ptr readonly elementtype(%struct.bar) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 2, i32 immarg 1)
+; CHECK-SAME:      #[[v5:.*]], !tbaa
+; CHECK-NEXT:   ret i32 %[[b1]]
+; CHECK-NEXT: }
+
+; CHECK:      attributes #[[v5]] = { memory(argmem: read) }
+
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.struct.access.index.p0.p0(ptr, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "some-file.c", directory: "/some/dir/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 1, !"wchar_size", i32 4}
+!4 = !{!"clang"}
+!5 = distinct !DISubprogram(name: "buz", scope: !1, file: !1, line: 15, type: !6, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !19)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !9}
+!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
+!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "bar", file: !1, line: 9, size: 128, elements: !11)
+!11 = !{!12, !13, !14}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "_1", scope: !10, file: !1, line: 10, baseType: !8, size: 32)
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "_2", scope: !10, file: !1, line: 11, baseType: !8, size: 32, offset: 32)
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !10, file: !1, line: 12, baseType: !15, size: 64, offset: 64)
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 4, size: 64, elements: !16)
+!16 = !{!17, !18}
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !15, file: !1, line: 5, baseType: !8, size: 32)
+!18 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !15, file: !1, line: 6, baseType: !8, size: 32, offset: 32)
+!19 = !{!20}
+!20 = !DILocalVariable(name: "p", arg: 1, scope: !5, file: !1, line: 15, type: !9)
+!21 = !DILocation(line: 0, scope: !5)
+!22 = !DILocation(line: 16, column: 13, scope: !5)
+!23 = !DILocation(line: 16, column: 15, scope: !5)
+!24 = !{!25, !26, i64 12}
+!25 = !{!"bar", !26, i64 0, !26, i64 4, !29, i64 8}
+!26 = !{!"int", !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C/C++ TBAA"}
+!29 = !{!"foo", !26, i64 0, !26, i64 4}
+!30 = !DILocation(line: 16, column: 3, scope: !5)

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-align.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-align.ll
new file mode 100644
index 000000000000000..019c93c424b1938
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-align.ll
@@ -0,0 +1,67 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that getelementptr.and.load unroll restores alignment spec.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    typedef int aligned_int __attribute__((aligned(128)));
+;    
+;    struct foo {
+;      int _;
+;      aligned_int a;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p) {
+;      consume(p->a);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, [124 x i8], i32, [124 x i8] }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %a1 = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.load.i32
+      (ptr readonly elementtype(%struct.foo) %p,
+       i1 false, i8 0, i8 1, i8 7, i1 true, i32 immarg 0, i32 immarg 2)
+    #4, !tbaa !2
+  call void @consume(i32 noundef %a1)
+  ret void
+}
+
+; CHECK: define dso_local void @bar(ptr noundef %[[p:.*]])
+; CHECK:   %[[a11:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 2
+; CHECK:   %[[v2:.*]] = load i32, ptr %[[a11]], align 128
+; CHECK:   call void @consume(i32 noundef %[[v2]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare i32 @llvm.bpf.getelementptr.and.load.i32(ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #3
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nounwind willreturn }
+attributes #4 = { memory(argmem: read) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 128}
+!3 = !{!"foo", !4, i64 0, !4, i64 128}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain-oob.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain-oob.ll
new file mode 100644
index 000000000000000..d8fa3482b6cc02d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain-oob.ll
@@ -0,0 +1,74 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that getelementptr.and.load unroll can skip 'inbounds' flag.
+;
+; Source (IR modified by hand):
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void buz(struct foo *p) {
+;      consume(p->b.bb);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+;
+; Modified to set 'inbounds' flag to false.
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %bb1 = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.load.i32
+      (ptr readonly elementtype(%struct.foo) %p,
+       i1 false, i8 0, i8 1, i8 2, i1 false, i32 immarg 0, i32 immarg 1, i32 immarg 1)
+    #4, !tbaa !2
+  call void @consume(i32 noundef %bb1)
+  ret void
+}
+
+; CHECK: define dso_local void @buz(ptr noundef %[[p:.*]])
+; CHECK:   %[[bb11:.*]] = getelementptr %struct.foo, ptr %[[p]], i32 0, i32 1, i32 1
+; CHECK:   %[[v2:.*]] = load i32, ptr %[[bb11]], align 4
+; CHECK:   call void @consume(i32 noundef %[[v2]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare i32 @llvm.bpf.getelementptr.and.load.i32(ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #3
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nounwind willreturn }
+attributes #4 = { memory(argmem: read) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 8}
+!3 = !{!"foo", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"bar", !4, i64 0, !4, i64 4}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain-u8.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain-u8.ll
new file mode 100644
index 000000000000000..ac6f830bf5d4d35
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain-u8.ll
@@ -0,0 +1,68 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check unroll of getelementptr.and.load when direct memory offset is
+; used instead of field indexes.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      char aa;
+;      char bb;
+;    };
+;    
+;    struct bar {
+;      char a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    extern void consume(char);
+;    
+;    void buz(struct bar *p) {
+;      consume(((struct foo *)(((char*)&p->b) + 1))->bb);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %bb1 = call i8 (ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.load.i8
+      (ptr readonly elementtype(i8) %p,
+       i1 false, i8 0, i8 1, i8 0, i1 true, i64 immarg 3)
+    #4, !tbaa !2
+  call void @consume(i8 noundef signext %bb1)
+  ret void
+}
+
+; CHECK: define dso_local void @buz(ptr noundef %[[p:.*]])
+; CHECK:   %[[bb11:.*]] = getelementptr inbounds i8, ptr %[[p]], i64 3
+; CHECK:   %[[v2:.*]] = load i8, ptr %[[bb11]], align 1
+; CHECK:   call void @consume(i8 noundef signext %[[v2]])
+
+declare void @consume(i8 noundef signext) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare i8 @llvm.bpf.getelementptr.and.load.i8(ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #3
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nounwind willreturn }
+attributes #4 = { memory(argmem: read) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 1}
+!3 = !{!"foo", !4, i64 0, !4, i64 1}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain.ll
new file mode 100644
index 000000000000000..d6ffb270529a121
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-chain.ll
@@ -0,0 +1,73 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check unroll of getelementptr.and.load when several field indexes
+; are specified in a chain.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void buz(struct foo *p) {
+;      consume(p->b.bb);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %bb1 = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.load.i32
+      (ptr readonly elementtype(%struct.foo) %p,
+       i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1, i32 immarg 1)
+    #4, !tbaa !2
+  call void @consume(i32 noundef %bb1)
+  ret void
+}
+
+; CHECK: define dso_local void @buz(ptr noundef %[[p:.*]])
+; CHECK:   %[[bb11:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 1, i32 1
+; CHECK:   %[[v2:.*]] = load i32, ptr %[[bb11]], align 4
+; CHECK:   call void @consume(i32 noundef %[[v2]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare i32 @llvm.bpf.getelementptr.and.load.i32(ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #3
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nounwind willreturn }
+attributes #4 = { memory(argmem: read) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 8}
+!3 = !{!"foo", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"bar", !4, i64 0, !4, i64 4}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-simple.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-simple.ll
new file mode 100644
index 000000000000000..ae19dd7ad98d894
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-simple.ll
@@ -0,0 +1,65 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check unroll of getelementptr.and.load.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;      int b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p){
+;      consume(p->b);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %b1 = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.load.i32
+      (ptr readonly elementtype(%struct.foo) %p,
+       i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+    #4, !tbaa !2
+  call void @consume(i32 noundef %b1)
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr noundef %[[p:.*]]) #[[v1:.*]] {
+; CHECK:        %[[b11:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 1
+; CHECK-NEXT:   %[[v2:.*]] = load i32, ptr %[[b11]], align 4
+; CHECK-NEXT:   call void @consume(i32 noundef %[[v2]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare i32 @llvm.bpf.getelementptr.and.load.i32(ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #3
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nounwind willreturn }
+attributes #4 = { memory(argmem: read) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"foo", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-volatile.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-volatile.ll
new file mode 100644
index 000000000000000..d9634a3fc3a9c8e
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-undo-volatile.ll
@@ -0,0 +1,64 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that unroll of getelementptr.and.load restores volatile.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;      volatile int b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p){
+;      consume(p->b);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %b1 = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.load.i32
+      (ptr elementtype(%struct.foo) %p,
+       i1 true, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1),
+    !tbaa !2
+  call void @consume(i32 noundef %b1)
+  ret void
+}
+
+; CHECK: define dso_local void @bar(ptr noundef %[[p:.*]])
+; CHECK:   %[[b11:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 1
+; CHECK:   %[[v2:.*]] = load volatile i32, ptr %[[b11]], align 4
+; CHECK:   call void @consume(i32 noundef %[[v2]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare i32 @llvm.bpf.getelementptr.and.load.i32(ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #3
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nounwind willreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"foo", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-union-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-union-pai.ll
new file mode 100644
index 000000000000000..f90e3c54b072772
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-union-pai.ll
@@ -0,0 +1,110 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    #define __pai __attribute__((preserve_access_index))
+;    
+;    struct foo {
+;      char a[10];
+;    } __pai;
+;    
+;    struct bar {
+;      int a;
+;      int b;
+;    } __pai;
+;    
+;    union buz {
+;      struct foo a;
+;      struct bar b;
+;    } __pai __ctx;
+;    
+;    int quux(union buz *p) {
+;      return p->b.b;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -debug-info-kind=limited -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local i32 @quux(ptr noundef %p) #0 !dbg !5 {
+entry:
+  call void @llvm.dbg.value(metadata ptr %p, metadata !26, metadata !DIExpression()), !dbg !27
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p), !dbg !28
+  %1 = call ptr @llvm.preserve.union.access.index.p0.p0(ptr %0, i32 1), !dbg !28, !llvm.preserve.access.index !10
+  %2 = call ptr @llvm.preserve.struct.access.index.p0.p0(ptr elementtype(%struct.bar) %1, i32 1, i32 1), !dbg !29, !llvm.preserve.access.index !21
+  %3 = load i32, ptr %2, align 4, !dbg !29, !tbaa !30
+  ret i32 %3, !dbg !33
+}
+
+; CHECK:      define dso_local i32 @quux(ptr noundef %[[p:.*]]) {{.*}} {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.dbg.value
+; CHECK-NEXT:   %[[v5:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:       (ptr readonly elementtype(%struct.bar) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+; CHECK-SAME:      #[[v6:.*]], !tbaa
+; CHECK-NEXT:   ret i32 %[[v5]]
+; CHECK-NEXT: }
+; CHECK:      attributes #[[v6]] = { memory(argmem: read) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.union.access.index.p0.p0(ptr, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.struct.access.index.p0.p0(ptr, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "some-file.c", directory: "/some/dir/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 1, !"wchar_size", i32 4}
+!4 = !{!"clang"}
+!5 = distinct !DISubprogram(name: "quux", scope: !1, file: !1, line: 18, type: !6, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !25)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !9}
+!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
+!10 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "buz", file: !1, line: 13, size: 96, elements: !11)
+!11 = !{!12, !20}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !10, file: !1, line: 14, baseType: !13, size: 80)
+!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 4, size: 80, elements: !14)
+!14 = !{!15}
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !13, file: !1, line: 5, baseType: !16, size: 80)
+!16 = !DICompositeType(tag: DW_TAG_array_type, baseType: !17, size: 80, elements: !18)
+!17 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!18 = !{!19}
+!19 = !DISubrange(count: 10)
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !10, file: !1, line: 15, baseType: !21, size: 64)
+!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "bar", file: !1, line: 8, size: 64, elements: !22)
+!22 = !{!23, !24}
+!23 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !21, file: !1, line: 9, baseType: !8, size: 32)
+!24 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !21, file: !1, line: 10, baseType: !8, size: 32, offset: 32)
+!25 = !{!26}
+!26 = !DILocalVariable(name: "p", arg: 1, scope: !5, file: !1, line: 18, type: !9)
+!27 = !DILocation(line: 0, scope: !5)
+!28 = !DILocation(line: 19, column: 13, scope: !5)
+!29 = !DILocation(line: 19, column: 15, scope: !5)
+!30 = !{!31, !31, i64 0}
+!31 = !{!"omnipotent char", !32, i64 0}
+!32 = !{!"Simple C/C++ TBAA"}
+!33 = !DILocation(line: 19, column: 3, scope: !5)

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-unroll-inline.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-unroll-inline.ll
new file mode 100644
index 000000000000000..78172cd17dca485
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-unroll-inline.ll
@@ -0,0 +1,108 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check position of bpf-preserve-static-offset pass in the pipeline:
+; - preserve.static.offset call is preserved if address is passed as
+;   a parameter to an inline-able function;
+; - second bpf-preserve-static-offset pass (after inlining) should introduce
+;   getelementptr.and.load call using the preserved marker after loops
+;   unrolling;
+; - readonly and tbaa attributes should allow replacement of
+;   getelementptr.and.load calls by CSE transformation.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;      int b[4];
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    static inline void bar(int * restrict p) {
+;        consume(p[1]);
+;    }
+;    
+;    void quux(struct foo *p){
+;      unsigned long i = 0;
+;    #pragma clang loop unroll(full)
+;      while (i < 2) {
+;        bar(p->b);
+;        ++i;
+;      }
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, [4 x i32] }
+
+; Function Attrs: nounwind
+define dso_local void @quux(ptr noundef %p) #0 {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp ult i64 %i.0, 2
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %arraydecay = getelementptr inbounds [4 x i32], ptr %b, i64 0, i64 0
+  call void @bar(ptr noundef %arraydecay)
+  %inc = add i64 %i.0, 1
+  br label %while.cond, !llvm.loop !2
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: inlinehint nounwind
+define internal void @bar(ptr noalias noundef %p) #2 {
+entry:
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 1
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !5
+  call void @consume(i32 noundef %0)
+  ret void
+}
+
+; CHECK:      define dso_local void @quux(ptr nocapture noundef readonly %[[p:.*]])
+; CHECK:        %[[v1:.*]] = tail call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:       (ptr readonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i64 immarg 0, i32 immarg 1, i64 immarg 1)
+; CHECK:        tail call void @consume(i32 noundef %[[v1]])
+; CHECK:        tail call void @consume(i32 noundef %[[v1]])
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+declare void @consume(i32 noundef) #4
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { inlinehint nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #4 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.mustprogress"}
+!4 = !{!"llvm.loop.unroll.full"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-unroll.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-unroll.ll
new file mode 100644
index 000000000000000..7c3303342bb6db9
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-unroll.ll
@@ -0,0 +1,95 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check position of bpf-preserve-static-offset pass in the pipeline:
+; preserve.static.offset call should be preserved long enough to allow
+; introduction of getelementptr.and.load after loops unrolling.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;      int b[4];
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p){
+;      unsigned long i = 0;
+;    #pragma clang loop unroll(full)
+;      while (i < 2)
+;        consume(p->b[i++]);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, [4 x i32] }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp ult i64 %i.0, 2
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %inc = add i64 %i.0, 1
+  %arrayidx = getelementptr inbounds [4 x i32], ptr %b, i64 0, i64 %i.0
+  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2
+  call void @consume(i32 noundef %1)
+  br label %while.cond, !llvm.loop !6
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr nocapture noundef readonly %[[p:.*]])
+; CHECK:        %[[v1:.*]] = tail call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:       (ptr readonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i64 immarg 0, i32 immarg 1, i64 immarg 0)
+; CHECK-SAME:      #[[attrs:.*]], !tbaa
+; CHECK-NEXT:   tail call void @consume(i32 noundef %[[v1]])
+; CHECK-NEXT:   %[[v2:.*]] = tail call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:       (ptr readonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i64 immarg 0, i32 immarg 1, i64 immarg 1)
+; CHECK-SAME:      #[[attrs]], !tbaa
+; CHECK-NEXT:   tail call void @consume(i32 noundef %[[v2]])
+; CHECK:      attributes #[[attrs]] = { memory(argmem: read) }
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+declare void @consume(i32 noundef) #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = distinct !{!6, !7, !8}
+!7 = !{!"llvm.loop.mustprogress"}
+!8 = !{!"llvm.loop.unroll.full"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-volatile.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-volatile.ll
new file mode 100644
index 000000000000000..819a4b31fb23de1
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-volatile.ll
@@ -0,0 +1,62 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check handling of a volatile load instruction by bpf-preserve-static-offset.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int _;
+;      volatile int a;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p) {
+;      consume(p->a);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %1 = load volatile i32, ptr %a, align 4, !tbaa !2
+  call void @consume(i32 noundef %1)
+  ret void
+}
+
+; CHECK:      %[[a1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:    @llvm.bpf.getelementptr.and.load.i32
+; CHECK-SAME:      (ptr elementtype(%struct.foo) %{{[^,]+}},
+; CHECK-SAME:       i1 true, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+;                   ^^^^^^^^
+;                   volatile
+; CHECK-NOT:  #{{[0-9]+}}
+; CHECK-NEXT: call void @consume(i32 noundef %[[a1]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"foo", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-zero.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-zero.ll
new file mode 100644
index 000000000000000..681c9640cbb87b6
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-zero.ll
@@ -0,0 +1,57 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that loads from zero offset are not modified by bpf-preserve-static-offset.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p) {
+;      consume(p->a);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 0
+  %1 = load i32, ptr %a, align 4, !tbaa !2
+  call void @consume(i32 noundef %1)
+  ret void
+}
+
+; CHECK:      entry:
+; CHECK-NEXT:   %[[a:.*]] = getelementptr inbounds %struct.foo, ptr %[[p:.*]], i32 0, i32 0
+; CHECK-NEXT:   %[[v2:.*]] = load i32, ptr %[[a]], align 4, !tbaa
+; CHECK-NEXT:   call void @consume(i32 noundef %[[v2]])
+
+declare void @consume(i32 noundef) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"foo", !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-align.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-align.ll
new file mode 100644
index 000000000000000..667f8f5a8d8b49d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-align.ll
@@ -0,0 +1,59 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check handling of a store instruction for a field with non-standard
+; alignment by bpf-preserve-static-offset.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    typedef int aligned_int __attribute__((aligned(128)));
+;    
+;    struct foo {
+;      int _;
+;      aligned_int a;
+;    } __ctx;
+;    
+;    void bar(struct foo *p) {                             
+;      p->a = 7;                                      
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, [124 x i8], i32, [124 x i8] }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 2
+  store i32 7, ptr %a, align 128, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr nocapture noundef writeonly %[[p:.*]])
+; CHECK:        tail call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 7,
+; CHECK-SAME:        ptr writeonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 7, i1 true, i32 immarg 0, i32 immarg 2)
+; CHECK-SAME:      #[[v2:.*]], !tbaa
+; CHECK:      attributes #[[v2]] = { memory(argmem: write) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 128}
+!3 = !{!"foo", !4, i64 0, !4, i64 128}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-atomic.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-atomic.ll
new file mode 100644
index 000000000000000..443966337b9dae5
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-atomic.ll
@@ -0,0 +1,60 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check handling of atomic store instruction by bpf-preserve-static-offset.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int _;
+;      int a;
+;    } __ctx;
+;    
+;    void bar(struct foo *p) {                         
+;      int r;                                          
+;      r = 7;
+;      __atomic_store(&p->a, &r, 3);
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  store atomic i32 7, ptr %a release, align 4
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr nocapture noundef %[[p:.*]])
+; CHECK:        tail call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 7,
+; CHECK-SAME:        ptr elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 5, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+; CHECK-NOT:  #{{[0-9]+}}
+; CHECK-NEXT:   ret void
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-2.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-2.ll
new file mode 100644
index 000000000000000..49f3fa5b83837d0
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-2.ll
@@ -0,0 +1,77 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds GEP chains that end by
+; getelementptr.and.store.
+;
+; Source (modified by hand):
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    void buz(struct foo *p) {
+;      p->b.bb = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+;
+; And modified to fold last getelementptr/store as a single
+; getelementptr.and.store.
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.store.i32
+      (i32 42,
+       ptr writeonly elementtype(%struct.bar) %b,
+       i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+    #3, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @buz(ptr noundef %[[p:.*]])
+; CHECK:        call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 42,
+; CHECK-SAME:        ptr writeonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1, i32 immarg 1)
+; CHECK-SAME:      #[[v2:.*]], !tbaa
+; CHECK:      attributes #[[v2]] = { memory(argmem: write) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare void @llvm.bpf.getelementptr.and.store.i32(i32, ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn }
+attributes #3 = { memory(argmem: write) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 8}
+!3 = !{!"foo", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"bar", !4, i64 0, !4, i64 4}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-oob.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-oob.ll
new file mode 100644
index 000000000000000..e2878f09130358a
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-oob.ll
@@ -0,0 +1,67 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset keeps track of 'inbounds' flags while
+; folding chain of GEP instructions.
+;
+; Source (IR modified by hand):
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    void buz(struct foo *p) {
+;      p->b.bb = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+;
+; Modified to remove one of the 'inbounds' from one of the GEP instructions.
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %bb = getelementptr %struct.bar, ptr %b, i32 0, i32 1
+  store i32 42, ptr %bb, align 4, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @buz(ptr nocapture noundef writeonly %[[p:.*]])
+; CHECK:        tail call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 42,
+; CHECK-SAME:        ptr writeonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 false, i32 immarg 0, i32 immarg 1, i32 immarg 1)
+; CHECK-SAME:      #[[v2:.*]], !tbaa
+; CHECK:      attributes #[[v2]] = { memory(argmem: write) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 8}
+!3 = !{!"foo", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"bar", !4, i64 0, !4, i64 4}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-u8-oob.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-u8-oob.ll
new file mode 100644
index 000000000000000..a337325466776f4
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-u8-oob.ll
@@ -0,0 +1,67 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds chain of GEP instructions.
+; The GEP chain in this example has type mismatch and thus is
+; folded as i8 access.
+;
+; Source (modified by hand):
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      char aa;
+;      char bb;
+;    };
+;    
+;    struct bar {
+;      char a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    void buz(struct bar *p) {
+;      ((struct foo *)(((char*)&p->b) + 1))->bb = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+;
+; Modified to remove one of the 'inbounds' from one of the getelementptr.
+
+%struct.bar = type { i8, %struct.foo }
+%struct.foo = type { i8, i8 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.bar, ptr %0, i32 0, i32 1
+  %add.ptr = getelementptr i8, ptr %b, i64 1
+  %bb = getelementptr inbounds %struct.foo, ptr %add.ptr, i32 0, i32 1
+  store i8 42, ptr %bb, align 1, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @buz(ptr nocapture noundef writeonly %[[p:.*]])
+; CHECK:        tail call void (i8, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i8
+; CHECK-SAME:       (i8 42,
+; CHECK-SAME:        ptr writeonly elementtype(i8) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 0, i1 false, i64 immarg 3)
+; CHECK-SAME:      #[[v2:.*]], !tbaa ![[v3:.*]]
+; CHECK:      attributes #[[v2]] = { memory(argmem: write) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 1}
+!3 = !{!"foo", !4, i64 0, !4, i64 1}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-u8.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-u8.ll
new file mode 100644
index 000000000000000..92740603ae69b7e
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain-u8.ll
@@ -0,0 +1,68 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds chain of GEP instructions.
+; The GEP chain in this example has type mismatch and thus is
+; folded as i8 access.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      char aa;
+;      char bb;
+;    };
+;    
+;    struct bar {
+;      char a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    void buz(struct bar *p) {
+;      ((struct foo *)(((char*)&p->b) + 1))->bb = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.bar = type { i8, %struct.foo }
+%struct.foo = type { i8, i8 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.bar, ptr %0, i32 0, i32 1
+  %add.ptr = getelementptr inbounds i8, ptr %b, i64 1
+;                                   ~~
+;         these types do not match, thus GEP chain is folded as an offset
+;                              ~~~~~~~~~~~
+  %bb = getelementptr inbounds %struct.foo, ptr %add.ptr, i32 0, i32 1
+  store i8 42, ptr %bb, align 1, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @buz(ptr nocapture noundef writeonly %[[p:.*]])
+; CHECK:        tail call void (i8, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i8
+; CHECK-SAME:       (i8 42,
+; CHECK-SAME:        ptr writeonly elementtype(i8) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 0, i1 true, i64 immarg 3)
+; CHECK-SAME:      #[[v2:.*]], !tbaa ![[v3:.*]]
+; CHECK:      attributes #[[v2]] = { memory(argmem: write) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 1}
+!3 = !{!"foo", !4, i64 0, !4, i64 1}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain.ll
new file mode 100644
index 000000000000000..d4c90616bf5cbf8
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-chain.ll
@@ -0,0 +1,64 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that bpf-preserve-static-offset folds chain of GEP instructions.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    void buz(struct foo *p) {
+;      p->b.bb = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %bb = getelementptr inbounds %struct.bar, ptr %b, i32 0, i32 1
+  store i32 42, ptr %bb, align 4, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @buz(ptr nocapture noundef writeonly %[[p:.*]])
+; CHECK:        tail call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 42,
+; CHECK-SAME:        ptr writeonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1, i32 immarg 1)
+; CHECK-SAME:      #[[v2:.*]], !tbaa
+; CHECK:      attributes #[[v2]] = { memory(argmem: write) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 8}
+!3 = !{!"foo", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"bar", !4, i64 0, !4, i64 4}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-pai.ll
new file mode 100644
index 000000000000000..b22b2683682606e
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-pai.ll
@@ -0,0 +1,136 @@
+; RUN: opt -passes=bpf-preserve-static-offset -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    #define __pai __attribute__((preserve_access_index))
+;    
+;    struct foo {
+;      char a[10];
+;    } __pai;
+;    
+;    struct bar {
+;      int a;
+;      int b;
+;    } __pai;
+;    
+;    struct buz {
+;      int _1;
+;      int _2;
+;      int _3;
+;      union {
+;        struct foo a;
+;        struct bar b[7];
+;      };
+;    } __pai __ctx;
+;    
+;    void quux(struct buz *p) {
+;      p->b[5].b = 42;
+;    }
+;    
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes \
+;         -debug-info-kind=limited -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.buz = type { i32, i32, i32, %union.anon }
+%union.anon = type { [7 x %struct.bar] }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @quux(ptr noundef %p) #0 !dbg !31 {
+entry:
+  call void @llvm.dbg.value(metadata ptr %p, metadata !36, metadata !DIExpression()), !dbg !37
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p), !dbg !38
+  %1 = call ptr @llvm.preserve.struct.access.index.p0.p0(ptr elementtype(%struct.buz) %0, i32 3, i32 3), !dbg !38, !llvm.preserve.access.index !4
+  %2 = call ptr @llvm.preserve.union.access.index.p0.p0(ptr %1, i32 1), !dbg !38, !llvm.preserve.access.index !3
+  %3 = call ptr @llvm.preserve.array.access.index.p0.p0(ptr elementtype([7 x %struct.bar]) %2, i32 1, i32 5), !dbg !39, !llvm.preserve.access.index !21
+  %4 = call ptr @llvm.preserve.struct.access.index.p0.p0(ptr elementtype(%struct.bar) %3, i32 1, i32 1), !dbg !40, !llvm.preserve.access.index !22
+  store i32 42, ptr %4, align 4, !dbg !41, !tbaa !42
+  ret void, !dbg !45
+}
+
+; CHECK:      define dso_local void @quux(ptr noundef %[[p:.*]]) {{.*}} {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.dbg.value
+; CHECK-NEXT:   call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 42,
+; CHECK-SAME:        ptr writeonly elementtype(i8) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i64 immarg 56)
+; CHECK-SAME:      #[[v5:.*]], !tbaa
+; CHECK-NEXT:   ret void, !dbg
+; CHECK-NEXT: }
+; CHECK:      attributes #[[v5]] = { memory(argmem: write) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.struct.access.index.p0.p0(ptr, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.union.access.index.p0.p0(ptr, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.preserve.array.access.index.p0.p0(ptr, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!28, !29}
+!llvm.ident = !{!30}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "some-file.c", directory: "/some/dir/")
+!2 = !{!3, !21}
+!3 = distinct !DICompositeType(tag: DW_TAG_union_type, scope: !4, file: !1, line: 17, size: 448, elements: !11)
+!4 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "buz", file: !1, line: 13, size: 544, elements: !5)
+!5 = !{!6, !8, !9, !10}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "_1", scope: !4, file: !1, line: 14, baseType: !7, size: 32)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !DIDerivedType(tag: DW_TAG_member, name: "_2", scope: !4, file: !1, line: 15, baseType: !7, size: 32, offset: 32)
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "_3", scope: !4, file: !1, line: 16, baseType: !7, size: 32, offset: 64)
+!10 = !DIDerivedType(tag: DW_TAG_member, scope: !4, file: !1, line: 17, baseType: !3, size: 448, offset: 96)
+!11 = !{!12, !20}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !3, file: !1, line: 18, baseType: !13, size: 80)
+!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 4, size: 80, elements: !14)
+!14 = !{!15}
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !13, file: !1, line: 5, baseType: !16, size: 80)
+!16 = !DICompositeType(tag: DW_TAG_array_type, baseType: !17, size: 80, elements: !18)
+!17 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!18 = !{!19}
+!19 = !DISubrange(count: 10)
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !3, file: !1, line: 19, baseType: !21, size: 448)
+!21 = !DICompositeType(tag: DW_TAG_array_type, baseType: !22, size: 448, elements: !26)
+!22 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "bar", file: !1, line: 8, size: 64, elements: !23)
+!23 = !{!24, !25}
+!24 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !22, file: !1, line: 9, baseType: !7, size: 32)
+!25 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !22, file: !1, line: 10, baseType: !7, size: 32, offset: 32)
+!26 = !{!27}
+!27 = !DISubrange(count: 7)
+!28 = !{i32 2, !"Debug Info Version", i32 3}
+!29 = !{i32 1, !"wchar_size", i32 4}
+!30 = !{!"clang"}
+!31 = distinct !DISubprogram(name: "quux", scope: !1, file: !1, line: 23, type: !32, scopeLine: 23, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !35)
+!32 = !DISubroutineType(types: !33)
+!33 = !{null, !34}
+!34 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !4, size: 64)
+!35 = !{!36}
+!36 = !DILocalVariable(name: "p", arg: 1, scope: !31, file: !1, line: 23, type: !34)
+!37 = !DILocation(line: 0, scope: !31)
+!38 = !DILocation(line: 24, column: 6, scope: !31)
+!39 = !DILocation(line: 24, column: 3, scope: !31)
+!40 = !DILocation(line: 24, column: 11, scope: !31)
+!41 = !DILocation(line: 24, column: 13, scope: !31)
+!42 = !{!43, !43, i64 0}
+!43 = !{!"omnipotent char", !44, i64 0}
+!44 = !{!"Simple C/C++ TBAA"}
+!45 = !DILocation(line: 25, column: 1, scope: !31)

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-simple.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-simple.ll
new file mode 100644
index 000000000000000..a603ad86673943b
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-simple.ll
@@ -0,0 +1,60 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check handling of a simple store instruction by bpf-preserve-static-offset.
+; Verify:
+; - presence of gep.and.store intrinsic call
+; - correct attributes for intrinsic call
+; - presence of tbaa annotations
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int _;
+;      int a;
+;    } __ctx;
+;    
+;    void bar(struct foo *p) {                         
+;      p->a = 7;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  store i32 7, ptr %a, align 4, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr nocapture noundef writeonly %[[p:.*]])
+; CHECK:        tail call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 7,
+; CHECK-SAME:        ptr writeonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+; CHECK-SAME:      #[[v2:.*]], !tbaa
+; CHECK:      attributes #[[v2]] = { memory(argmem: write) }
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"foo", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-align.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-align.ll
new file mode 100644
index 000000000000000..7996fe0d1bb28dc
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-align.ll
@@ -0,0 +1,62 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that getelementptr.and.store unroll restores alignment spec.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    typedef int aligned_int __attribute__((aligned(128)));
+;    
+;    struct foo {
+;      int _;
+;      aligned_int a;
+;    } __ctx;
+;    
+;    void bar(struct foo *p) {
+;      p->a = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, [124 x i8], i32, [124 x i8] }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.store.i32
+      (i32 42,
+       ptr writeonly elementtype(%struct.foo) %p,
+       i1 false, i8 0, i8 1, i8 7, i1 true, i32 immarg 0, i32 immarg 2)
+    #3, !tbaa !2
+  ret void
+}
+
+; CHECK: define dso_local void @bar(ptr noundef %[[p:.*]])
+; CHECK:   %[[v2:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 2
+; CHECK:   store i32 42, ptr %[[v2]], align 128
+; CHECK:   ret void
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare void @llvm.bpf.getelementptr.and.store.i32(i32, ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn }
+attributes #3 = { memory(argmem: write) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 128}
+!3 = !{!"foo", !4, i64 0, !4, i64 128}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain-oob.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain-oob.ll
new file mode 100644
index 000000000000000..d2731d32ed4cd6c
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain-oob.ll
@@ -0,0 +1,67 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that getelementptr.and.load unroll can skip 'inbounds' flag.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    void buz(struct foo *p) {
+;      p->b.bb = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.store.i32
+      (i32 42,
+       ptr writeonly elementtype(%struct.foo) %p,
+       i1 false, i8 0, i8 1, i8 2, i1 false, i32 immarg 0, i32 immarg 1, i32 immarg 1)
+    #3, !tbaa !2
+  ret void
+}
+
+; CHECK: define dso_local void @buz(ptr noundef %[[p:.*]])
+; CHECK:   %[[v2:.*]] = getelementptr %struct.foo, ptr %[[p]], i32 0, i32 1, i32 1
+; CHECK:   store i32 42, ptr %[[v2]], align 4
+; CHECK:   ret void
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare void @llvm.bpf.getelementptr.and.store.i32(i32, ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn }
+attributes #3 = { memory(argmem: write) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 8}
+!3 = !{!"foo", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"bar", !4, i64 0, !4, i64 4}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain-u8.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain-u8.ll
new file mode 100644
index 000000000000000..184c5c334905926
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain-u8.ll
@@ -0,0 +1,62 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check unroll of getelementptr.and.store when direct memory offset is
+; used instead of field indexes.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      char aa;
+;      char bb;
+;    };
+;    
+;    struct bar {
+;      char a;
+;      struct foo b;
+;    } __ctx;
+;    
+;    void buz(struct bar *p) {
+;      ((struct foo *)(((char*)&p->b) + 1))->bb = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  call void (i8, ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.store.i8
+      (i8 42,
+       ptr writeonly elementtype(i8) %p,
+       i1 false, i8 0, i8 1, i8 0, i1 true, i64 immarg 3)
+    #3, !tbaa !2
+  ret void
+}
+
+; CHECK: define dso_local void @buz(ptr noundef %[[p:.*]])
+; CHECK:   %[[v2:.*]] = getelementptr inbounds i8, ptr %[[p]], i64 3
+; CHECK:   store i8 42, ptr %[[v2]], align 1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare void @llvm.bpf.getelementptr.and.store.i8(i8, ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn }
+attributes #3 = { memory(argmem: write) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 1}
+!3 = !{!"foo", !4, i64 0, !4, i64 1}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain.ll
new file mode 100644
index 000000000000000..2899ab03f50d4f5
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-chain.ll
@@ -0,0 +1,68 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check unroll of getelementptr.and.store when several field indexes
+; are specified in a chain.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct bar {
+;      int aa;
+;      int bb;
+;    };
+;    
+;    struct foo {
+;      int a;
+;      struct bar b;
+;    } __ctx;
+;    
+;    void buz(struct foo *p) {
+;      p->b.bb = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, %struct.bar }
+%struct.bar = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @buz(ptr noundef %p) #0 {
+entry:
+  call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.store.i32
+      (i32 42,
+       ptr writeonly elementtype(%struct.foo) %p,
+       i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1, i32 immarg 1)
+    #3, !tbaa !2
+  ret void
+}
+
+; CHECK: define dso_local void @buz(ptr noundef %[[p:.*]])
+; CHECK:   %[[v2:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 1, i32 1
+; CHECK:   store i32 42, ptr %[[v2]], align 4
+; CHECK:   ret void
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare void @llvm.bpf.getelementptr.and.store.i32(i32, ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn }
+attributes #3 = { memory(argmem: write) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 8}
+!3 = !{!"foo", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"bar", !4, i64 0, !4, i64 4}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-simple.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-simple.ll
new file mode 100644
index 000000000000000..8ad5eae9847535b
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-simple.ll
@@ -0,0 +1,61 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check unroll of getelementptr.and.store.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;      int b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p){
+;      p->b = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.store.i32
+      (i32 42,
+       ptr writeonly elementtype(%struct.foo) %p,
+       i1 false, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1)
+    #3, !tbaa !2
+  ret void
+}
+
+; CHECK: define dso_local void @bar(ptr noundef %[[p:.*]])
+; CHECK:   %[[v2:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 1
+; CHECK:   store i32 42, ptr %[[v2]], align 4
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare void @llvm.bpf.getelementptr.and.store.i32(i32, ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn }
+attributes #3 = { memory(argmem: write) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"foo", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-volatile.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-volatile.ll
new file mode 100644
index 000000000000000..79495732f972033
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-undo-volatile.ll
@@ -0,0 +1,61 @@
+; RUN: opt --bpf-check-and-opt-ir -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that unroll of getelementptr.and.store restores volatile.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;      volatile int b;
+;    } __ctx;
+;    
+;    extern void consume(int);
+;    
+;    void bar(struct foo *p){
+;      p->b = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=sroa,bpf-preserve-static-offset -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+    @llvm.bpf.getelementptr.and.store.i32
+      (i32 42,
+       ptr elementtype(%struct.foo) %p,
+       i1 true, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1),
+    !tbaa !2
+  ret void
+}
+
+; CHECK: define dso_local void @bar(ptr noundef %[[p:.*]])
+; CHECK: entry:
+; CHECK:   %[[v2:.*]] = getelementptr inbounds %struct.foo, ptr %[[p]], i32 0, i32 1
+; CHECK:   store volatile i32 42, ptr %[[v2]], align 4
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn
+declare void @llvm.bpf.getelementptr.and.store.i32(i32, ptr nocapture, i1 immarg, i8 immarg, i8 immarg, i8 immarg, i1 immarg, ...) #2
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"foo", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-unroll-inline.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-unroll-inline.ll
new file mode 100644
index 000000000000000..161aded79eac3c3
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-unroll-inline.ll
@@ -0,0 +1,104 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check position of bpf-preserve-static-offset pass in the pipeline:
+; - preserve.static.offset call is preserved if address is passed as
+;   a parameter to an inline-able function;
+; - second bpf-preserve-static-offset pass (after inlining) should introduce
+;   getelementptr.and.store call using the preserved marker after loops
+;   unrolling;
+; - memory(argmem: readwrite) and tbaa attributes should allow
+;   removing one getelementptr.and.store call.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;      int b[4];
+;    } __ctx;
+;    
+;    static inline void bar(int * restrict p, unsigned long i) {
+;      p[0] = i;
+;    }
+;    
+;    void quux(struct foo *p){
+;      unsigned long i = 0;
+;    #pragma clang loop unroll(full)
+;      while (i < 2) {
+;        bar(p->b, i);
+;        ++i;
+;      }
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, [4 x i32] }
+
+; Function Attrs: nounwind
+define dso_local void @quux(ptr noundef %p) #0 {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp ult i64 %i.0, 2
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  %arraydecay = getelementptr inbounds [4 x i32], ptr %b, i64 0, i64 0
+  call void @bar(ptr noundef %arraydecay, i64 noundef %i.0)
+  %inc = add i64 %i.0, 1
+  br label %while.cond, !llvm.loop !2
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: inlinehint nounwind
+define internal void @bar(ptr noalias noundef %p, i64 noundef %i) #2 {
+entry:
+  %conv = trunc i64 %i to i32
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 0
+  store i32 %conv, ptr %arrayidx, align 4, !tbaa !5
+  ret void
+}
+
+; CHECK:      define dso_local void @quux(ptr nocapture noundef writeonly %[[p:.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   tail call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 1,
+; CHECK-SAME:        ptr writeonly elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 false, i8 0, i8 1, i8 2, i1 true, i64 immarg 0, i32 immarg 1)
+; CHECK-NEXT:   ret void
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { inlinehint nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.mustprogress"}
+!4 = !{!"llvm.loop.unroll.full"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-volatile.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-volatile.ll
new file mode 100644
index 000000000000000..8b0493a38efa6fe
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-volatile.ll
@@ -0,0 +1,56 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check handling of a volatile store instruction by bpf-preserve-static-offset.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;      volatile int b;
+;    } __ctx;
+;    
+;    void bar(struct foo *p) {
+;      p->b = 42;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %b = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 1
+  store volatile i32 42, ptr %b, align 4, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr nocapture noundef %[[p:.*]])
+; CHECK:        tail call void (i32, ptr, i1, i8, i8, i8, i1, ...)
+; CHECK-SAME:     @llvm.bpf.getelementptr.and.store.i32
+; CHECK-SAME:       (i32 42,
+; CHECK-SAME:        ptr elementtype(%struct.foo) %[[p]],
+; CHECK-SAME:        i1 true, i8 0, i8 1, i8 2, i1 true, i32 immarg 0, i32 immarg 1),
+; CHECK-NOT:       #{{[0-9]+}}
+; CHECK-SAME:      !tbaa
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 4}
+!3 = !{!"foo", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

diff  --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll
new file mode 100644
index 000000000000000..7f2a06af8d10f91
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll
@@ -0,0 +1,51 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o - %s | FileCheck %s
+;
+; Check that stores from zero offset are not modified by bpf-preserve-static-offset.
+;
+; Source:
+;    #define __ctx __attribute__((preserve_static_offset))
+;    
+;    struct foo {
+;      int a;
+;    } __ctx;
+;    
+;    void bar(struct foo *p) {
+;      p->a = 0;
+;    }
+;
+; Compilation flag:
+;   clang -cc1 -O2 -triple bpf -S -emit-llvm -disable-llvm-passes -o - \
+;       | opt -passes=function(sroa) -S -o -
+
+%struct.foo = type { i32 }
+
+; Function Attrs: nounwind
+define dso_local void @bar(ptr noundef %p) #0 {
+entry:
+  %0 = call ptr @llvm.preserve.static.offset(ptr %p)
+  %a = getelementptr inbounds %struct.foo, ptr %0, i32 0, i32 0
+  store i32 0, ptr %a, align 4, !tbaa !2
+  ret void
+}
+
+; CHECK:      define dso_local void @bar(ptr nocapture noundef writeonly %[[p:.*]])
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   store i32 0, ptr %[[p]], align 4, !tbaa
+; CHECK-NEXT:   ret void
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr @llvm.preserve.static.offset(ptr readnone) #1
+
+attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"foo", !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}


        


More information about the llvm-commits mailing list