[llvm] [clang] [BPF] add cast_{user,kern} instructions (PR #79902)

via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 31 17:35:57 PST 2024


https://github.com/eddyz87 updated https://github.com/llvm/llvm-project/pull/79902

>From 449bc0e4578bc04813974b82e3bf98b5407f5461 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87 at gmail.com>
Date: Fri, 26 Jan 2024 04:18:32 +0200
Subject: [PATCH 1/3] [BPF] add cast_{user,kern} instructions

This commit aims to support BPF arena kernel side feature:
- arena is a memory region accessible from both
  BPF program and userspace;
- base pointers for this memory region differ between
  kernel and user spaces;
- `dst_reg = cast_user(src_reg, addr_space_no)`
  translates src_reg, kernel space pointer within arena,
  to dst_reg, equivalent user space pointer within arena
  (both pointers have identical offset from arena start),
  addr_space_no is an immediate constant, used to identify
  the particular arena;
- `dst_reg = cast_kern(src_reg, addr_space_no)`
  is similar but in opposite direction: converts user space arena
  pointer to kernel space arena pointer.

On the LLVM side, the goal is to have all arena pointers stored in
arena memory in user space format:
- assume that pointers with non-zero address space are pointers to
  arena memory;
- assume that arena is identified by address space number;
- assume that every BPF-side load or store from arena is done via
  pointer in user address space, thus convert base pointers using
  cast_kern;
- assume that every BPF-side store of arena pointer value is in kernel
  address space, thus convert stored pointers with cast_user.

Only load and store IR instructions are handled at the moment.

For example, the following C code:

```c

struct list {
  struct list __as *next;
  int i;
};

extern struct list __as *mklist(void);

struct list __as *push(int i, struct list __as *list) {
  struct list __as *elt = mklist();
  elt->i = i;
  elt->next = list;
  return elt;
}
```

Compiled to the following IR:

```llvm
  %call = tail call ptr addrspace(272) @mklist() #2
  %i1 = getelementptr inbounds %struct.list, ptr addrspace(272) %call, i64 0, i32 1
  store i32 %i, ptr addrspace(272) %i1, align 8, !tbaa !3
  store ptr addrspace(272) %list, ptr addrspace(272) %call, align 8
  ret ptr addrspace(272) %call
```

Is transformed to:

```llvm
  %list6 = call ptr addrspace(272) @llvm.bpf.addr.space.p272.p272(ptr addrspace(272) %list, i32 2) ;; cast_user
  %call = tail call ptr addrspace(272) @mklist() #3
  %call4 = call ptr addrspace(272) @llvm.bpf.addr.space.p272.p272(ptr addrspace(272) %call, i32 1) ;; cast_kern
  %i15 = getelementptr inbounds %struct.list, ptr addrspace(272) %call4, i64 0, i32 1
  store i32 %i, ptr addrspace(272) %i15, align 8, !tbaa !3
  store ptr addrspace(272) %list6, ptr addrspace(272) %call4, align 8
  ret ptr addrspace(272) %call
```

And compiled as:

```asm
  r6 = r2
  r7 = r1
  call mklist
  r1 = cast_kern(r0, 272)
  *(u32 *)(r1 + 8) = r7
  r2 = cast_user(r6, 272)
  *(u64 *)(r1 + 0) = r2
  exit
```

Internally:
- use a new intrinsic function to mark the conversions:
  `llvm.bpf.addr.space(<pointer>, <cast_direction>)`,
  where `cast_diretion` is an immediate describing whether
  operation is `cast_kern` (1) or `cast_user` (2);
- piggy-back `BPFCheckAndAdjustIR` pass to insert the above intrinsic
  calls for load and store instructions;
- modify `BPFInstrInfo.td` and `BPFIselLowering.cpp` to allow
  translation of new intrinsic:
  - define and SDNode type `BPFAddrSpace` to represent new intrinsic:
    - override `BPFTargetLowering::CollectTargetIntrinsicOperands()`
      method to add pointer address space as a parameter of intrinsic
      SDNode;
    - define `BPFTargetLowering::LowerINTRINSIC_WO_CHAIN()` called
      from `BPFTargetLowering::LowerOperation()` to lower intrinsic
      call to an SDNode;
  - define new instructions: `ADDR_SPACE_K`, `ADDR_SPACE_U`;
  - define patterns to lower `BPFAddrSpace` as `ADDR_SPACE_{KU}`.
---
 llvm/include/llvm/IR/IntrinsicsBPF.td         |   5 +
 llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp   |  87 ++++++++++
 llvm/lib/Target/BPF/BPFISelLowering.cpp       |  44 +++++
 llvm/lib/Target/BPF/BPFISelLowering.h         |   8 +-
 llvm/lib/Target/BPF/BPFInstrInfo.td           |  20 +++
 llvm/test/CodeGen/BPF/addr-space-builtin.ll   | 153 ++++++++++++++++++
 llvm/test/CodeGen/BPF/addr-space-gep-chain.ll |  34 ++++
 llvm/test/CodeGen/BPF/addr-space-insn.ll      |  15 ++
 llvm/test/CodeGen/BPF/addr-space-ku-chain.ll  |  56 +++++++
 .../BPF/addr-space-ku-for-same-base.ll        |  61 +++++++
 llvm/test/CodeGen/BPF/addr-space-phi.ll       |  68 ++++++++
 11 files changed, 550 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-builtin.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-gep-chain.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-insn.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-ku-chain.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-ku-for-same-base.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-phi.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index c7ec0916f1d1f..436cd0f577271 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -76,4 +76,9 @@ let TargetPrefix = "bpf" in {  // All intrinsics start with "llvm.bpf."
                          ImmArg    <ArgIndex<5>>, // alignment
                          ImmArg    <ArgIndex<6>>, // inbounds
                         ]>;
+  def int_bpf_arena_cast :
+              Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty],
+                        [IntrSpeculatable, IntrNoMem,
+                         NoCapture <ArgIndex<0>>,
+                         ImmArg    <ArgIndex<1>>]>;
 }
diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index 81effc9b1db46..a31c9045c0d0b 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -55,6 +55,7 @@ class BPFCheckAndAdjustIR final : public ModulePass {
   bool removeCompareBuiltin(Module &M);
   bool sinkMinMax(Module &M);
   bool removeGEPBuiltins(Module &M);
+  bool insertASpaceBuiltins(Module &M);
 };
 } // End anonymous namespace
 
@@ -416,11 +417,97 @@ bool BPFCheckAndAdjustIR::removeGEPBuiltins(Module &M) {
   return Changed;
 }
 
+static Instruction *aspaceWrapValue(DenseMap<Value *, Instruction *> &Cache,
+                                    Function *F, Value *ToWrap, unsigned Code) {
+  auto It = Cache.find(ToWrap);
+  if (It != Cache.end())
+    return It->getSecond();
+
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(ToWrap)) {
+    Value *Ptr = GEP->getPointerOperand();
+    Value *WrappedPtr = aspaceWrapValue(Cache, F, Ptr, Code);
+    auto *NewGEP = GEP->clone();
+    NewGEP->insertAfter(GEP);
+    NewGEP->setOperand(GEP->getPointerOperandIndex(), WrappedPtr);
+    NewGEP->setName(GEP->getName());
+    Cache[ToWrap] = NewGEP;
+    return NewGEP;
+  }
+
+  Module *M = F->getParent();
+  IRBuilder IB(F->getContext());
+  if (Instruction *InsnPtr = dyn_cast<Instruction>(ToWrap))
+    IB.SetInsertPoint(*InsnPtr->getInsertionPointAfterDef());
+  else
+    IB.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt());
+  Type *PtrTy = ToWrap->getType();
+  Function *ASpaceFn =
+      Intrinsic::getDeclaration(M, Intrinsic::bpf_arena_cast, {PtrTy, PtrTy});
+  auto *Call =
+      IB.CreateCall(ASpaceFn, {ToWrap, IB.getInt32(Code)}, ToWrap->getName());
+  Cache[ToWrap] = Call;
+  return Call;
+}
+
+// Wrap operand with a call to bpf.arena.cast() builtin
+static void aspaceWrapOperand(DenseMap<Value *, Instruction *> &Cache,
+                              Instruction *I, unsigned OpNum, unsigned Code) {
+  Value *OldOp = I->getOperand(OpNum);
+  if (OldOp->getType()->getPointerAddressSpace() == 0)
+    return;
+
+  Value *NewOp = aspaceWrapValue(Cache, I->getFunction(), OldOp, Code);
+  I->setOperand(OpNum, NewOp);
+  for (;;) {
+    auto *OldGEP = dyn_cast<GetElementPtrInst>(OldOp);
+    if (!OldGEP)
+      break;
+    if (!OldGEP->use_empty())
+      break;
+    OldOp = OldGEP->getPointerOperand();
+    OldGEP->eraseFromParent();
+  }
+}
+
+enum {
+  ASPACE_TO_KERNEL = 1,
+  ASPACE_TO_USER = 2,
+};
+
+bool BPFCheckAndAdjustIR::insertASpaceBuiltins(Module &M) {
+  bool Changed = false;
+  for (Function &F : M) {
+    DenseMap<Value *, Instruction *> ToKernelCache;
+    DenseMap<Value *, Instruction *> ToUserCache;
+    for (BasicBlock &BB : F) {
+      for (Instruction &I : BB) {
+        if (auto *LD = dyn_cast<LoadInst>(&I)) {
+          aspaceWrapOperand(ToKernelCache, LD, LD->getPointerOperandIndex(),
+                            ASPACE_TO_KERNEL);
+          continue;
+        }
+        if (auto *ST = dyn_cast<StoreInst>(&I)) {
+          aspaceWrapOperand(ToKernelCache, ST, ST->getPointerOperandIndex(),
+                            ASPACE_TO_KERNEL);
+          Value *VO = ST->getValueOperand();
+          PointerType *VOTy = dyn_cast<PointerType>(VO->getType());
+          if (VOTy && VOTy->getAddressSpace() != 0)
+            aspaceWrapOperand(ToUserCache, ST, 0, ASPACE_TO_USER);
+          continue;
+        }
+      }
+    }
+    Changed |= !ToKernelCache.empty() || !ToUserCache.empty();
+  }
+  return Changed;
+}
+
 bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
   bool Changed = removePassThroughBuiltin(M);
   Changed = removeCompareBuiltin(M) || Changed;
   Changed = sinkMinMax(M) || Changed;
   Changed = removeGEPBuiltins(M) || Changed;
+  Changed = insertASpaceBuiltins(M) || Changed;
   return Changed;
 }
 
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 4d8ace7c1ece0..79cdc639530ff 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/IntrinsicsBPF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -75,6 +76,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
 
+  setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
+
   // Set unsupported atomic operations as Custom so
   // we can emit better error messages than fatal error
   // from selectiondag.
@@ -137,6 +140,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
   }
 
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
   // Extended load operations for i1 types must be promoted
   for (MVT VT : MVT::integer_valuetypes()) {
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
@@ -315,6 +320,10 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSDIVSREM(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+    return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::ADDRSPACECAST:
+    return LowerADDRSPACECAST(Op, DAG);
   }
 }
 
@@ -638,6 +647,39 @@ SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   return DAG.getMergeValues(Ops, SDLoc());
 }
 
+SDValue BPFTargetLowering::LowerADDRSPACECAST(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  fail(DL, DAG, "Address space casts are not supported by BPF back-end");
+  return Op.getOperand(0);
+}
+
+void BPFTargetLowering::CollectTargetIntrinsicOperands(
+    const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
+  Function *Func = I.getCalledFunction();
+  if (!Func)
+    return;
+  if (Func->getIntrinsicID() == Intrinsic::bpf_arena_cast) {
+    unsigned ASpace = I.getOperand(0)->getType()->getPointerAddressSpace();
+    Ops.push_back(DAG.getTargetConstant(ASpace, SDLoc(), MVT::i64));
+  }
+  return;
+}
+
+SDValue BPFTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  unsigned IntNo = Op.getConstantOperandVal(0);
+  if (IntNo == Intrinsic::bpf_arena_cast) {
+    SDValue Ptr = Op.getOperand(1);
+    SDValue Code = Op.getOperand(2);
+    SDValue ASpace = Op.getOperand(3);
+    return DAG.getNode(BPFISD::ADDR_SPACE, DL, Op.getValueType(), Ptr, Code,
+                       ASpace);
+  }
+  return SDValue();
+}
+
 SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -687,6 +729,8 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "BPFISD::Wrapper";
   case BPFISD::MEMCPY:
     return "BPFISD::MEMCPY";
+  case BPFISD::ADDR_SPACE:
+    return "BPFISD::ADDR_SPACE";
   }
   return nullptr;
 }
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 819711b650c15..2b568c21bc28f 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -28,7 +28,8 @@ enum NodeType : unsigned {
   SELECT_CC,
   BR_CC,
   Wrapper,
-  MEMCPY
+  MEMCPY,
+  ADDR_SPACE,
 };
 }
 
@@ -65,6 +66,9 @@ class BPFTargetLowering : public TargetLowering {
                          EVT VT) const override;
 
   MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
+  void CollectTargetIntrinsicOperands(const CallInst &I,
+                                      SmallVectorImpl<SDValue> &Ops,
+                                      SelectionDAG &DAG) const override;
 
 private:
   // Control Instruction Selection Features
@@ -75,6 +79,8 @@ class BPFTargetLowering : public TargetLowering {
 
   SDValue LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 7d443a3449014..b44ae35dd33f1 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -31,6 +31,9 @@ def SDT_BPFMEMCPY       : SDTypeProfile<0, 4, [SDTCisVT<0, i64>,
                                                SDTCisVT<1, i64>,
                                                SDTCisVT<2, i64>,
                                                SDTCisVT<3, i64>]>;
+def SDT_BPFAddrSpace    : SDTypeProfile<0, 3, [SDTCisPtrTy<0>,
+                                               SDTCisVT<1, i64>,
+                                               SDTCisVT<2, i64>]>;
 
 def BPFcall         : SDNode<"BPFISD::CALL", SDT_BPFCall,
                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
@@ -49,6 +52,7 @@ def BPFWrapper      : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>;
 def BPFmemcpy       : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY,
                              [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
                               SDNPMayStore, SDNPMayLoad]>;
+def BPFAddrSpace      : SDNode<"BPFISD::ADDR_SPACE", SDT_BPFAddrSpace>;
 def BPFIsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
 def BPFIsBigEndian    : Predicate<"!Subtarget->isLittleEndian()">;
 def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">;
@@ -418,8 +422,24 @@ let Predicates = [BPFHasMovsx] in {
                       "$dst = (s16)$src",
                       [(set GPR32:$dst, (sext_inreg GPR32:$src, i16))]>;
 }
+
+class ADDR_SPACE<int Code, string AsmPattern>
+    : ALU_RR<BPF_ALU64, BPF_MOV, 64,
+             (outs GPR:$dst),
+             (ins GPR:$src, i64imm:$imm),
+             AsmPattern,
+             []> {
+  bits<64> imm;
+  let Inst{47-32} = Code;
+  let Inst{31-0} = imm{31-0};
+}
+def ADDR_SPACE_K : ADDR_SPACE<1, "$dst = cast_kern($src, $imm)">;
+def ADDR_SPACE_U : ADDR_SPACE<2, "$dst = cast_user($src, $imm)">;
 }
 
+def : Pat<(BPFAddrSpace GPR:$src, 1, i64:$as), (ADDR_SPACE_K $src, $as)>;
+def : Pat<(BPFAddrSpace GPR:$src, 2, i64:$as), (ADDR_SPACE_U $src, $as)>;
+
 def FI_ri
     : TYPE_LD_ST<BPF_IMM.Value, BPF_DW.Value,
                  (outs GPR:$dst),
diff --git a/llvm/test/CodeGen/BPF/addr-space-builtin.ll b/llvm/test/CodeGen/BPF/addr-space-builtin.ll
new file mode 100644
index 0000000000000..abb57093e789c
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-builtin.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+; Generated from the following C code:
+
+; #define __uptr __attribute__((address_space(272)))
+;
+; void simple_store(void __uptr *foo) {
+;   *((volatile int __uptr *)(foo + 16)) = 0xdead;
+;   *((volatile int __uptr *)(foo + 12)) = 0xbeef;
+; }
+;
+; void separate_addr_store(void __uptr *foo, void __uptr *bar) {
+;   *((volatile int __uptr *)(foo + 16)) = 0xdead;
+;   *((volatile int __uptr *)(bar + 12)) = 0xbeef;
+; }
+;
+; void ptr_store(void __uptr *foo, void __uptr *bar) {
+;   *((volatile void __uptr * __uptr*)(foo + 16)) = bar + 16;
+;   *((volatile void __uptr * __uptr*)(foo + 8)) = bar + 8;
+; }
+;
+; void separate_ptr_store(void __uptr *foo, void __uptr *bar, void __uptr *buz) {
+;   *((volatile void __uptr * __uptr*)(foo + 16)) = bar;
+;   *((volatile void __uptr * __uptr*)(foo + 8)) = buz;
+; }
+;
+; int simple_load(int __uptr *foo) {
+;   return *foo;
+; }
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+; Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite)
+define dso_local void @simple_store(ptr addrspace(272) noundef %foo) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @simple_store(
+; CHECK-SAME: ptr addrspace(272) noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[FOO]], i32 1)
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP0]], i64 16
+; CHECK-NEXT:    store volatile i32 57005, ptr addrspace(272) [[ADD_PTR]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP0]], i64 12
+; CHECK-NEXT:    store volatile i32 48879, ptr addrspace(272) [[ADD_PTR1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16
+  store volatile i32 57005, ptr addrspace(272) %add.ptr, align 4, !tbaa !3
+  %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 12
+  store volatile i32 48879, ptr addrspace(272) %add.ptr1, align 4, !tbaa !3
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite)
+define dso_local void @separate_addr_store(ptr addrspace(272) noundef %foo, ptr addrspace(272) noundef %bar) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @separate_addr_store(
+; CHECK-SAME: ptr addrspace(272) noundef [[FOO:%.*]], ptr addrspace(272) noundef [[BAR:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[BAR]], i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[FOO]], i32 1)
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP1]], i64 16
+; CHECK-NEXT:    store volatile i32 57005, ptr addrspace(272) [[ADD_PTR]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP0]], i64 12
+; CHECK-NEXT:    store volatile i32 48879, ptr addrspace(272) [[ADD_PTR1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16
+  store volatile i32 57005, ptr addrspace(272) %add.ptr, align 4, !tbaa !3
+  %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %bar, i64 12
+  store volatile i32 48879, ptr addrspace(272) %add.ptr1, align 4, !tbaa !3
+  ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+define dso_local void @ptr_store(ptr addrspace(272) nocapture noundef writeonly %foo, ptr addrspace(272) noundef %bar) local_unnamed_addr #1 {
+; CHECK-LABEL: define dso_local void @ptr_store(
+; CHECK-SAME: ptr addrspace(272) nocapture noundef writeonly [[FOO:%.*]], ptr addrspace(272) noundef [[BAR:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[BAR]], i32 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[FOO]], i32 1)
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP0]], i64 16
+; CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP1]], i64 16
+; CHECK-NEXT:    store ptr addrspace(272) [[ADD_PTR]], ptr addrspace(272) [[ADD_PTR1]], align 8, !tbaa [[TBAA7:![0-9]+]]
+; CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP0]], i64 8
+; CHECK-NEXT:    [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP1]], i64 8
+; CHECK-NEXT:    store ptr addrspace(272) [[ADD_PTR2]], ptr addrspace(272) [[ADD_PTR3]], align 8, !tbaa [[TBAA7]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %bar, i64 16
+  %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16
+  store ptr addrspace(272) %add.ptr, ptr addrspace(272) %add.ptr1, align 8, !tbaa !7
+  %add.ptr2 = getelementptr inbounds i8, ptr addrspace(272) %bar, i64 8
+  %add.ptr3 = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 8
+  store ptr addrspace(272) %add.ptr2, ptr addrspace(272) %add.ptr3, align 8, !tbaa !7
+  ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+define dso_local void @separate_ptr_store(ptr addrspace(272) nocapture noundef writeonly %foo, ptr addrspace(272) noundef %bar, ptr addrspace(272) noundef %buz) local_unnamed_addr #1 {
+; CHECK-LABEL: define dso_local void @separate_ptr_store(
+; CHECK-SAME: ptr addrspace(272) nocapture noundef writeonly [[FOO:%.*]], ptr addrspace(272) noundef [[BAR:%.*]], ptr addrspace(272) noundef [[BUZ:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[BUZ]], i32 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[BAR]], i32 2)
+; CHECK-NEXT:    [[TMP2:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[FOO]], i32 1)
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP2]], i64 16
+; CHECK-NEXT:    store ptr addrspace(272) [[TMP1]], ptr addrspace(272) [[ADD_PTR]], align 8, !tbaa [[TBAA7]]
+; CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(272) [[TMP2]], i64 8
+; CHECK-NEXT:    store ptr addrspace(272) [[TMP0]], ptr addrspace(272) [[ADD_PTR1]], align 8, !tbaa [[TBAA7]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16
+  store ptr addrspace(272) %bar, ptr addrspace(272) %add.ptr, align 8, !tbaa !7
+  %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 8
+  store ptr addrspace(272) %buz, ptr addrspace(272) %add.ptr1, align 8, !tbaa !7
+  ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define dso_local i32 @simple_load(ptr addrspace(272) nocapture noundef readonly %foo) local_unnamed_addr #2 {
+; CHECK-LABEL: define dso_local i32 @simple_load(
+; CHECK-SAME: ptr addrspace(272) nocapture noundef readonly [[FOO:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) [[FOO]], i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(272) [[TMP0]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %0 = load i32, ptr addrspace(272) %foo, align 4, !tbaa !3
+  ret i32 %0
+}
+
+attributes #0 = { nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"some clang version"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"any pointer", !5, i64 0}
diff --git a/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll b/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll
new file mode 100644
index 0000000000000..80ddc3333cf1b
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+define dso_local void @test(ptr addrspace(1) noundef %p) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @test(
+; CHECK-SAME: ptr addrspace(1) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = call ptr addrspace(1) @llvm.bpf.arena.cast.p1.p1(ptr addrspace(1) [[P]], i32 1)
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P1]], i64 8
+; CHECK-NEXT:    [[B3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[A2]], i64 16
+; CHECK-NEXT:    [[C4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[B3]], i64 24
+; CHECK-NEXT:    [[D5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[C4]], i64 32
+; CHECK-NEXT:    store i64 11, ptr addrspace(1) [[C4]], align 8
+; CHECK-NEXT:    store i64 22, ptr addrspace(1) [[D5]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = getelementptr inbounds i8, ptr addrspace(1) %p, i64 8
+  %b = getelementptr inbounds i8, ptr addrspace(1) %a, i64 16
+  %c = getelementptr inbounds i8, ptr addrspace(1) %b, i64 24
+  %d = getelementptr inbounds i8, ptr addrspace(1) %c, i64 32
+  store i64 11, ptr addrspace(1) %c, align 8
+  store i64 22, ptr addrspace(1) %d, align 8
+  ret void
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"some clan version"}
diff --git a/llvm/test/CodeGen/BPF/addr-space-insn.ll b/llvm/test/CodeGen/BPF/addr-space-insn.ll
new file mode 100644
index 0000000000000..362dbad03c698
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-insn.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=bpfel -mcpu=v4 -filetype=asm -show-mc-encoding < %s | FileCheck %s
+
+define dso_local void @test_fn(ptr addrspace(272) noundef %a, ptr addrspace(272) noundef %b) {
+; CHECK-LABEL: test_fn:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    r2 = cast_kern(r2, 272)  # encoding: [0xbf,0x22,0x01,0x00,0x10,0x01,0x00,0x00]
+; CHECK-NEXT:    r1 = cast_user(r1, 272)  # encoding: [0xbf,0x11,0x02,0x00,0x10,0x01,0x00,0x00]
+; CHECK-NEXT:    *(u64 *)(r2 + 0) = r1
+; CHECK-NEXT:    exit
+entry:
+  store volatile ptr addrspace(272) %a, ptr addrspace(272) %b, align 8
+  ret void
+}
+
+declare ptr addrspace(272) @llvm.bpf.arena.cast.p272.p272(ptr addrspace(272) nocapture, i32 immarg)
diff --git a/llvm/test/CodeGen/BPF/addr-space-ku-chain.ll b/llvm/test/CodeGen/BPF/addr-space-ku-chain.ll
new file mode 100644
index 0000000000000..e38c4082f6d4c
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-ku-chain.ll
@@ -0,0 +1,56 @@
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __uptr __attribute__((address_space(272)))
+;
+;   void test(void __uptr *q, void __uptr *p) {
+;     void __uptr * __uptr *a;
+;     void __uptr * __uptr *b;
+;
+;      a = q + 8;
+;     *a = p;
+;      b = p + 16;
+;     *b = a;
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+define dso_local void @test(ptr addrspace(1) noundef %q, ptr addrspace(1) noundef %p) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @test
+; CHECK-SAME:    (ptr addrspace(1) noundef [[Q:%.*]], ptr addrspace(1) noundef [[P:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[QU:%.*]] = call ptr addrspace(1) @llvm.bpf.arena.cast.p1.p1(ptr addrspace(1) [[Q]], i32 2)
+; CHECK-NEXT:    [[PK:%.*]] = call ptr addrspace(1) @llvm.bpf.arena.cast.p1.p1(ptr addrspace(1) [[P]], i32 1)
+; CHECK-NEXT:    [[PU:%.*]] = call ptr addrspace(1) @llvm.bpf.arena.cast.p1.p1(ptr addrspace(1) [[P]], i32 2)
+; CHECK-NEXT:    [[QK:%.*]] = call ptr addrspace(1) @llvm.bpf.arena.cast.p1.p1(ptr addrspace(1) [[Q]], i32 1)
+; CHECK-NEXT:    [[AU:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[QU]], i64 8
+; CHECK-NEXT:    [[AK:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[QK]], i64 8
+; CHECK-NEXT:    store ptr addrspace(1) [[PU]], ptr addrspace(1) [[AK]], align 8
+; CHECK-NEXT:    [[BK:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PK]], i64 16
+; CHECK-NEXT:    store ptr addrspace(1) [[AU]], ptr addrspace(1) [[BK]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(1) %q, i64 8
+  store ptr addrspace(1) %p, ptr addrspace(1) %add.ptr, align 8, !tbaa !3
+  %add.ptr1 = getelementptr inbounds i8, ptr addrspace(1) %p, i64 16
+  store ptr addrspace(1) %add.ptr, ptr addrspace(1) %add.ptr1, align 8, !tbaa !3
+  ret void
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"some clan version"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"any pointer", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/CodeGen/BPF/addr-space-ku-for-same-base.ll b/llvm/test/CodeGen/BPF/addr-space-ku-for-same-base.ll
new file mode 100644
index 0000000000000..60c0b6a8637ad
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-ku-for-same-base.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __uptr __attribute__((address_space(272)))
+;
+;   struct htab;
+;   void __uptr *htab_for_user;
+;   extern void __uptr* bpf_alloc(void);
+;   void test(void) {
+;     long __uptr* p = bpf_alloc();
+;     p[2] = 2;
+;     htab_for_user = &p[2];
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+ at htab_for_user = dso_local local_unnamed_addr global ptr addrspace(1) null, align 8
+
+; Function Attrs: nounwind
+define dso_local void @test() local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @test
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr addrspace(1) @bpf_alloc()
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(1) @llvm.bpf.arena.cast.p1.p1(ptr addrspace(1) [[CALL]], i32 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr addrspace(1) @llvm.bpf.arena.cast.p1.p1(ptr addrspace(1) [[CALL]], i32 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP0]], i64 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 2
+; CHECK-NEXT:    store i64 2, ptr addrspace(1) [[TMP3]], align 8
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP2]], ptr @htab_for_user, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call ptr addrspace(1) @bpf_alloc() #2
+  %arrayidx = getelementptr inbounds i64, ptr addrspace(1) %call, i64 2
+  store i64 2, ptr addrspace(1) %arrayidx, align 8, !tbaa !3
+  store ptr addrspace(1) %arrayidx, ptr @htab_for_user, align 8, !tbaa !7
+  ret void
+}
+
+declare dso_local ptr addrspace(1) @bpf_alloc() local_unnamed_addr #1
+
+attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"some clang version"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"long", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"any pointer", !5, i64 0}
diff --git a/llvm/test/CodeGen/BPF/addr-space-phi.ll b/llvm/test/CodeGen/BPF/addr-space-phi.ll
new file mode 100644
index 0000000000000..1e88bacdc1dd0
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-phi.ll
@@ -0,0 +1,68 @@
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+;   extern int __uptr *magic1();
+;   extern int __uptr *magic2();
+;
+;   void test(long i) {
+;     int __uptr *a;
+;
+;     if (i > 42)
+;       a = magic1();
+;     else
+;       a = magic2();
+;     a[5] = 7;
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+; Function Attrs: nounwind
+define dso_local void @test(i64 noundef %i) local_unnamed_addr #0 {
+; CHECK-NOT:   @llvm.bpf.arena.cast
+; CHECK:       if.end:
+; CHECK-NEXT:    [[A_0:%.*]] = phi
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(1) @llvm.bpf.arena.cast.p1.p1(ptr addrspace(1) [[A_0]], i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i64 5
+; CHECK-NEXT:    store i32 7, ptr addrspace(1) [[TMP1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp sgt i64 %i, 42
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call = tail call ptr addrspace(1) @magic1() #2
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %call1 = tail call ptr addrspace(1) @magic2() #2
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %a.0 = phi ptr addrspace(1) [ %call, %if.then ], [ %call1, %if.else ]
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a.0, i64 5
+  store i32 7, ptr addrspace(1) %arrayidx, align 4, !tbaa !3
+  ret void
+}
+
+declare dso_local ptr addrspace(1) @magic1(...) local_unnamed_addr #1
+
+declare dso_local ptr addrspace(1) @magic2(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"some clang version"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}

>From 5b1d0ce07e5bf09e2b21f6564c3bd864e29da0a4 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87 at gmail.com>
Date: Tue, 30 Jan 2024 22:20:43 +0200
Subject: [PATCH 2/3] [BPF] merge global variables with address space to
 .arena.N section

Make it so that all globals within same address space reside in
section with name ".arena.N", where N is number of the address space.

E.g. for the following C program:

```c
__as const char a[2] = {1,2};
__as char b[2] = {3,4};
__as char c[2];
...
```

Generate the following layout:

```
$ clang -O2 --target=bpf t.c -c -o - \
  | llvm-readelf --sections --symbols -
...
Section Headers:
  [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
  ...
  [ 4] .arena.272        PROGBITS        0000000000000000 0000e8 000018 00  WA  0   0  4
  ...

Symbol table '.symtab' contains 8 entries:
   Num:    Value          Size Type    Bind   Vis       Ndx Name
     ...
     3: 0000000000000000     8 OBJECT  GLOBAL DEFAULT     4 a
     4: 0000000000000008     8 OBJECT  GLOBAL DEFAULT     4 b
     5: 0000000000000010     8 OBJECT  GLOBAL DEFAULT     4 c
     ...                                                 ^^^
                                                  Note section index
```
---
 llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp  | 12 ++++++++
 llvm/test/CodeGen/BPF/addr-space-globals.ll  | 30 ++++++++++++++++++++
 llvm/test/CodeGen/BPF/addr-space-globals2.ll | 25 ++++++++++++++++
 3 files changed, 67 insertions(+)
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-globals.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-globals2.ll

diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index a31c9045c0d0b..05e3eefb114d7 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -499,6 +499,18 @@ bool BPFCheckAndAdjustIR::insertASpaceBuiltins(Module &M) {
     }
     Changed |= !ToKernelCache.empty() || !ToUserCache.empty();
   }
+  // Merge all globals within same address space into single
+  // .arena.<addr space no> section
+  for (GlobalVariable &G : M.globals()) {
+    if (G.getAddressSpace() == 0 || G.hasSection())
+      continue;
+    SmallString<16> SecName;
+    raw_svector_ostream OS(SecName);
+    OS << ".arena." << G.getAddressSpace();
+    G.setSection(SecName);
+    // Prevent having separate section for constants
+    G.setConstant(false);
+  }
   return Changed;
 }
 
diff --git a/llvm/test/CodeGen/BPF/addr-space-globals.ll b/llvm/test/CodeGen/BPF/addr-space-globals.ll
new file mode 100644
index 0000000000000..878ba0dfce6cd
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-globals.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __as __attribute__((address_space(272)))
+;   __as const char a[2] = {1,2};
+;   __as char b[2] = {3,4};
+;   __as char c[2];
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+
+ at a = dso_local local_unnamed_addr addrspace(272) constant [2 x i8] [i8 1, i8 2], align 1
+ at b = dso_local local_unnamed_addr addrspace(272) global [2 x i8] [i8 3, i8 4], align 1
+ at c = dso_local local_unnamed_addr addrspace(272) global [2 x i8] zeroinitializer, align 1
+
+; Verify that a,b,c reside in the same section
+
+; CHECK:     .section .arena.272,"aw", at progbits
+; CHECK-NOT: .section
+; CHECK:     .globl  a
+; CHECK:     .ascii  "\001\002"
+; CHECK-NOT: .section
+; CHECK:     .globl  b
+; CHECK:     .ascii  "\003\004"
+; CHECK-NOT: .section
+; CHECK:     .globl  c
+; CHECK:     .zero   2
diff --git a/llvm/test/CodeGen/BPF/addr-space-globals2.ll b/llvm/test/CodeGen/BPF/addr-space-globals2.ll
new file mode 100644
index 0000000000000..d1e2318948751
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-globals2.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+;   __attribute__((address_space(1))) char a[2] = {1,2};
+;   __attribute__((address_space(2))) char b[2] = {3,4};
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+ at a = dso_local local_unnamed_addr addrspace(1) global [2 x i8] [i8 1, i8 2], align 1
+ at b = dso_local local_unnamed_addr addrspace(2) global [2 x i8] [i8 3, i8 4], align 1
+
+; Verify that a,b reside in separate sections
+
+; CHECK:     .section .arena.1,"aw", at progbits
+; CHECK-NOT: .section
+; CHECK:     .globl  a
+; CHECK:     .ascii  "\001\002"
+
+; CHECK:     .section .arena.2,"aw", at progbits
+; CHECK-NOT: .section
+; CHECK:     .globl  b
+; CHECK:     .ascii  "\003\004"

>From bd30d9ebbe863384f5b7977c053fcf8683b6008c Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87 at gmail.com>
Date: Tue, 30 Jan 2024 04:11:36 +0200
Subject: [PATCH 3/3] [BPF][CLANG] Front-end support for
 __builtin_bpf_arena_cast

Add support for `__builtin_bpf_arena_cast()` function and
`__BPF_FEATURE_ARENA_CAST` macro.

`__builtin_bpf_arena_cast()` takes two parameters:
- a pointer
- arena cast direction:
  - 1 for `cast_kern` instruction
  - 2 for `cast_user` instruction

And returns the pointer with the same type and address space.
For each call to `__builtin_bpf_arena_cast()` BPF backend
generates `cast_kern` or `cast_user` instruction.

`__BPF_FEATURE_ARENA_CAST` macro is defined if compiler supports
emission of `cast_kern` and `cast_user` instructions.
---
 clang/include/clang/Basic/BuiltinsBPF.td      |  7 ++++
 .../clang/Basic/DiagnosticSemaKinds.td        |  6 ++++
 clang/lib/Basic/Targets/BPF.cpp               |  2 ++
 clang/lib/CodeGen/CGBuiltin.cpp               | 12 ++++++-
 clang/lib/Sema/SemaChecking.cpp               | 33 ++++++++++++++-----
 .../test/CodeGen/builtins-bpf-arena-cast-pp.c | 12 +++++++
 clang/test/CodeGen/builtins-bpf-arena-cast.c  | 15 +++++++++
 .../test/Preprocessor/bpf-predefined-macros.c |  7 ++++
 clang/test/Sema/builtins-bpf.c                | 18 ++++++++++
 9 files changed, 102 insertions(+), 10 deletions(-)
 create mode 100644 clang/test/CodeGen/builtins-bpf-arena-cast-pp.c
 create mode 100644 clang/test/CodeGen/builtins-bpf-arena-cast.c

diff --git a/clang/include/clang/Basic/BuiltinsBPF.td b/clang/include/clang/Basic/BuiltinsBPF.td
index 169d05c870998..a20f8388bb787 100644
--- a/clang/include/clang/Basic/BuiltinsBPF.td
+++ b/clang/include/clang/Basic/BuiltinsBPF.td
@@ -35,3 +35,10 @@ def PreserveEnumValue : TargetBuiltin {
   let Attributes = [CustomTypeChecking];
   let Prototype = "long int(...)";
 }
+
+// Generating cast_user / cast_kern BPF instructions
+def ArenaCast : TargetBuiltin {
+  let Spellings = ["__builtin_bpf_arena_cast"];
+  let Attributes = [CustomTypeChecking, Const, Pure];
+  let Prototype = "void *(void *, int)";
+}
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 24d32cb87c89e..d6488d1f5118f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12004,6 +12004,12 @@ def err_preserve_enum_value_invalid : Error<
   "__builtin_preserve_enum_value argument %0 invalid">;
 def err_preserve_enum_value_not_const: Error<
   "__builtin_preserve_enum_value argument %0 not a constant">;
+def err_bpf_arena_cast_not_const: Error<
+  "__builtin_bpf_arena_cast argument %0 not a constant">;
+def err_bpf_arena_cast_not_pointer: Error<
+  "__builtin_bpf_arena_cast argument %0 should be a pointer">;
+def err_bpf_arena_cast_wrong_value: Error<
+  "__builtin_bpf_arena_cast argument %0 should be equal to 1 or 2">;
 
 def err_bit_cast_non_trivially_copyable : Error<
   "__builtin_bit_cast %select{source|destination}0 type must be trivially copyable">;
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index e3fbbb720d069..5d5a1550b3244 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -60,6 +60,8 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__BPF_FEATURE_GOTOL");
     Builder.defineMacro("__BPF_FEATURE_ST");
   }
+
+  Builder.defineMacro("__BPF_FEATURE_ARENA_CAST");
 }
 
 static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2",
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f3ab5ad7b08ec..818543c79765c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13207,7 +13207,8 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
   assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
           BuiltinID == BPF::BI__builtin_btf_type_id ||
           BuiltinID == BPF::BI__builtin_preserve_type_info ||
-          BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
+          BuiltinID == BPF::BI__builtin_preserve_enum_value ||
+          BuiltinID == BPF::BI__builtin_bpf_arena_cast) &&
          "unexpected BPF builtin");
 
   // A sequence number, injected into IR builtin functions, to
@@ -13307,6 +13308,15 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
     Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
     return Fn;
   }
+  case BPF::BI__builtin_bpf_arena_cast: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Direction = EmitScalarExpr(E->getArg(1));
+    llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
+        &CGM.getModule(), llvm::Intrinsic::bpf_arena_cast,
+        {Ptr->getType(), Ptr->getType()});
+    CallInst *Fn = Builder.CreateCall(IntrinsicFn, {Ptr, Direction});
+    return Fn;
+  }
   }
 }
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 502b24bcdf8b4..f279dbfbfd897 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3834,7 +3834,8 @@ bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID,
   assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
           BuiltinID == BPF::BI__builtin_btf_type_id ||
           BuiltinID == BPF::BI__builtin_preserve_type_info ||
-          BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
+          BuiltinID == BPF::BI__builtin_preserve_enum_value ||
+          BuiltinID == BPF::BI__builtin_bpf_arena_cast) &&
          "unexpected BPF builtin");
 
   if (checkArgCount(*this, TheCall, 2))
@@ -3851,16 +3852,27 @@ bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID,
       kind = diag::err_btf_type_id_not_const;
     else if (BuiltinID == BPF::BI__builtin_preserve_type_info)
       kind = diag::err_preserve_type_info_not_const;
-    else
+    else if (BuiltinID == BPF::BI__builtin_preserve_enum_value)
       kind = diag::err_preserve_enum_value_not_const;
+    else if (BuiltinID == BPF::BI__builtin_bpf_arena_cast)
+      kind = diag::err_bpf_arena_cast_not_const;
+    else
+      llvm_unreachable("unexpected BuiltinID");
     Diag(Arg->getBeginLoc(), kind) << 2 << Arg->getSourceRange();
     return true;
   }
 
+  if (BuiltinID == BPF::BI__builtin_bpf_arena_cast && Value != 1 &&
+      Value != 2) {
+    Diag(Arg->getBeginLoc(), diag::err_bpf_arena_cast_wrong_value)
+        << 2 << Arg->getSourceRange();
+    return true;
+  }
+
   // The first argument
   Arg = TheCall->getArg(0);
   bool InvalidArg = false;
-  bool ReturnUnsignedInt = true;
+  QualType ReturnType = Context.UnsignedIntTy;
   if (BuiltinID == BPF::BI__builtin_preserve_field_info) {
     if (!isValidBPFPreserveFieldInfoArg(Arg)) {
       InvalidArg = true;
@@ -3876,9 +3888,15 @@ bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID,
       InvalidArg = true;
       kind = diag::err_preserve_enum_value_invalid;
     }
-    ReturnUnsignedInt = false;
+    ReturnType = Context.UnsignedLongTy;
   } else if (BuiltinID == BPF::BI__builtin_btf_type_id) {
-    ReturnUnsignedInt = false;
+    ReturnType = Context.UnsignedLongTy;
+  } else if (BuiltinID == BPF::BI__builtin_bpf_arena_cast) {
+    if (!Arg->getType()->isPointerType()) {
+      InvalidArg = true;
+      kind = diag::err_bpf_arena_cast_not_pointer;
+    }
+    ReturnType = Arg->getType();
   }
 
   if (InvalidArg) {
@@ -3886,10 +3904,7 @@ bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID,
     return true;
   }
 
-  if (ReturnUnsignedInt)
-    TheCall->setType(Context.UnsignedIntTy);
-  else
-    TheCall->setType(Context.UnsignedLongTy);
+  TheCall->setType(ReturnType);
   return false;
 }
 
diff --git a/clang/test/CodeGen/builtins-bpf-arena-cast-pp.c b/clang/test/CodeGen/builtins-bpf-arena-cast-pp.c
new file mode 100644
index 0000000000000..d3f6028056a22
--- /dev/null
+++ b/clang/test/CodeGen/builtins-bpf-arena-cast-pp.c
@@ -0,0 +1,12 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+
+#if !__has_builtin(__builtin_bpf_arena_cast)
+#error "no __builtin_bpf_arena_cast builtin"
+#endif
+
+void test(void) {}
+
+// CHECK-LABEL: define {{.*}} @test()
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret void
diff --git a/clang/test/CodeGen/builtins-bpf-arena-cast.c b/clang/test/CodeGen/builtins-bpf-arena-cast.c
new file mode 100644
index 0000000000000..d47e0b8168f21
--- /dev/null
+++ b/clang/test/CodeGen/builtins-bpf-arena-cast.c
@@ -0,0 +1,15 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+
+#define __as __attribute__((address_space(7)))
+
+void __as *cast_kern(void __as *ptr) {
+  return __builtin_bpf_arena_cast(ptr, 1);
+}
+
+void __as *cast_user(void __as *ptr) {
+  return __builtin_bpf_arena_cast(ptr, 2);
+}
+
+// CHECK: call ptr addrspace(7) @llvm.bpf.arena.cast.p7.p7(ptr addrspace(7) %{{.*}}, i32 1)
+// CHECK: call ptr addrspace(7) @llvm.bpf.arena.cast.p7.p7(ptr addrspace(7) %{{.*}}, i32 2)
diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c
index ff4d00ac3bcfc..cdcf2b2cc7f0b 100644
--- a/clang/test/Preprocessor/bpf-predefined-macros.c
+++ b/clang/test/Preprocessor/bpf-predefined-macros.c
@@ -61,6 +61,9 @@ int r;
 #ifdef __BPF_FEATURE_ST
 int s;
 #endif
+#ifdef __BPF_FEATURE_ARENA_CAST
+int t;
+#endif
 
 // CHECK: int b;
 // CHECK: int c;
@@ -90,6 +93,10 @@ int s;
 // CPU_V4: int r;
 // CPU_V4: int s;
 
+// CPU_V2: int t;
+// CPU_V3: int t;
+// CPU_V4: int t;
+
 // CPU_GENERIC: int g;
 
 // CPU_PROBE: int f;
diff --git a/clang/test/Sema/builtins-bpf.c b/clang/test/Sema/builtins-bpf.c
index fc540260c91c3..4327e63c36a6c 100644
--- a/clang/test/Sema/builtins-bpf.c
+++ b/clang/test/Sema/builtins-bpf.c
@@ -102,3 +102,21 @@ unsigned invalid17(void) {
 unsigned invalid18(struct s *arg) {
   return __builtin_preserve_type_info(arg->a + 2, 0); // expected-error {{__builtin_preserve_type_info argument 1 invalid}}
 }
+
+#define __as __attribute__((address_space(7)))
+
+void __as *valid16(void __as *ptr) {
+  return __builtin_bpf_arena_cast(ptr, 1);
+}
+
+void __as *invalid19(void __as *ptr, int i) {
+  return __builtin_bpf_arena_cast(ptr, i); // expected-error {{__builtin_bpf_arena_cast argument 2 not a constant}}
+}
+
+void __as *invalid20(int i) {
+  return __builtin_bpf_arena_cast(i, 1); // expected-error {{__builtin_bpf_arena_cast argument 1 should be a pointer}}
+}
+
+void __as *invalid21(void __as *ptr) {
+  return __builtin_bpf_arena_cast(ptr, 3); // expected-error {{__builtin_bpf_arena_cast argument 2 should be equal to 1 or 2}}
+}



More information about the cfe-commits mailing list