[llvm] r365352 - [BPF] add new intrinsics preserve_{array, union, struct}_access_index

Yonghong Song via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 8 10:08:28 PDT 2019


Author: yhs
Date: Mon Jul  8 10:08:28 2019
New Revision: 365352

URL: http://llvm.org/viewvc/llvm-project?rev=365352&view=rev
Log:
[BPF] add new intrinsics preserve_{array,union,struct}_access_index

For background of BPF CO-RE project, please refer to
  http://vger.kernel.org/bpfconf2019.html
In summary, BPF CO-RE intends to compile bpf programs
adjustable on struct/union layout change so the same
program can run on multiple kernels with adjustment
before loading based on native kernel structures.

In order to do this, we need keep track of GEP(getelementptr)
instruction base and result debuginfo types, so we
can adjust on the host based on kernel BTF info.
Capturing such information as an IR optimization is hard
as various optimization may have tweaked GEP and also
union is replaced by structure it is impossible to track
fieldindex for union member accesses.

Three intrinsic functions, preserve_{array,union,struct}_access_index,
are introducted.
  addr = preserve_array_access_index(base, index, dimension)
  addr = preserve_union_access_index(base, di_index)
  addr = preserve_struct_access_index(base, gep_index, di_index)
here,
  base: the base pointer for the array/union/struct access.
  index: the last access index for array, the same for IR/DebugInfo layout.
  dimension: the array dimension.
  gep_index: the access index based on IR layout.
  di_index: the access index based on user/debuginfo types.

For example, for the following example,
  $ cat test.c
  struct sk_buff {
     int i;
     int b1:1;
     int b2:2;
     union {
       struct {
         int o1;
         int o2;
       } o;
       struct {
         char flags;
         char dev_id;
       } dev;
       int netid;
     } u[10];
  };

  static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr)
      = (void *) 4;

  #define _(x) (__builtin_preserve_access_index(x))

  int bpf_prog(struct sk_buff *ctx) {
    char dev_id;
    bpf_probe_read(&dev_id, sizeof(char), _(&ctx->u[5].dev.dev_id));
    return dev_id;
  }
  $ clang -target bpf -O2 -g -emit-llvm -S -mllvm -print-before-all \
    test.c >& log

The generated IR looks like below:

  ...
  define dso_local i32 @bpf_prog(%struct.sk_buff*) #0 !dbg !15 {
    %2 = alloca %struct.sk_buff*, align 8
    %3 = alloca i8, align 1
    store %struct.sk_buff* %0, %struct.sk_buff** %2, align 8, !tbaa !45
    call void @llvm.dbg.declare(metadata %struct.sk_buff** %2, metadata !43, metadata !DIExpression()), !dbg !49
    call void @llvm.lifetime.start.p0i8(i64 1, i8* %3) #4, !dbg !50
    call void @llvm.dbg.declare(metadata i8* %3, metadata !44, metadata !DIExpression()), !dbg !51
    %4 = load i32 (i8*, i32, i8*)*, i32 (i8*, i32, i8*)** @bpf_probe_read, align 8, !dbg !52, !tbaa !45
    %5 = load %struct.sk_buff*, %struct.sk_buff** %2, align 8, !dbg !53, !tbaa !45
    %6 = call [10 x %union.anon]* @llvm.preserve.struct.access.index.p0a10s_union.anons.p0s_struct.sk_buffs(
         %struct.sk_buff* %5, i32 2, i32 3), !dbg !53, !llvm.preserve.access.index !19
    %7 = call %union.anon* @llvm.preserve.array.access.index.p0s_union.anons.p0a10s_union.anons(
         [10 x %union.anon]* %6, i32 1, i32 5), !dbg !53
    %8 = call %union.anon* @llvm.preserve.union.access.index.p0s_union.anons.p0s_union.anons(
         %union.anon* %7, i32 1), !dbg !53, !llvm.preserve.access.index !26
    %9 = bitcast %union.anon* %8 to %struct.anon.0*, !dbg !53
    %10 = call i8* @llvm.preserve.struct.access.index.p0i8.p0s_struct.anon.0s(
         %struct.anon.0* %9, i32 1, i32 1), !dbg !53, !llvm.preserve.access.index !34
    %11 = call i32 %4(i8* %3, i32 1, i8* %10), !dbg !52
    %12 = load i8, i8* %3, align 1, !dbg !54, !tbaa !55
    %13 = sext i8 %12 to i32, !dbg !54
    call void @llvm.lifetime.end.p0i8(i64 1, i8* %3) #4, !dbg !56
    ret i32 %13, !dbg !57
  }

  !19 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "sk_buff", file: !3, line: 1, size: 704, elements: !20)
  !26 = distinct !DICompositeType(tag: DW_TAG_union_type, scope: !19, file: !3, line: 5, size: 64, elements: !27)
  !34 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !26, file: !3, line: 10, size: 16, elements: !35)

Note that @llvm.preserve.{struct,union}.access.index calls have metadata llvm.preserve.access.index
attached to instructions to provide struct/union debuginfo type information.

For &ctx->u[5].dev.dev_id,
  . The "%6 = ..." represents struct member "u" with index 2 for IR layout and index 3 for DI layout.
  . The "%7 = ..." represents array subscript "5".
  . The "%8 = ..." represents union member "dev" with index 1 for DI layout.
  . The "%10 = ..." represents struct member "dev_id" with index 1 for both IR and DI layout.

Basically, traversing the use-def chain recursively for the 3rd argument of bpf_probe_read() and
examining all preserve_*_access_index calls, the debuginfo struct/union/array access index
can be achieved.

The intrinsics also contain enough information to regenerate codes for IR layout.
For array and structure intrinsics, the proper GEP can be constructed.
For union intrinsics, replacing all uses of "addr" with "base" should be enough.

Signed-off-by: Yonghong Song <yhs at fb.com>

Differential Revision: https://reviews.llvm.org/D61810

Modified:
    llvm/trunk/docs/LangRef.rst
    llvm/trunk/include/llvm/IR/IRBuilder.h
    llvm/trunk/include/llvm/IR/Intrinsics.td
    llvm/trunk/include/llvm/IR/LLVMContext.h
    llvm/trunk/lib/IR/LLVMContext.cpp

Modified: llvm/trunk/docs/LangRef.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/LangRef.rst?rev=365352&r1=365351&r2=365352&view=diff
==============================================================================
--- llvm/trunk/docs/LangRef.rst (original)
+++ llvm/trunk/docs/LangRef.rst Mon Jul  8 10:08:28 2019
@@ -17304,3 +17304,106 @@ Lowering:
 """""""""
 
 Lowers to a call to `objc_storeWeak <https://clang.llvm.org/docs/AutomaticReferenceCounting.html#arc-runtime-objc-storeweak>`_.
+
+Preserving Debug Information Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These intrinsics are used to carry certain debuginfo together with
+IR-level operations. For example, it may be desirable to
+know the structure/union name and the original user-level field
+indices. Such information got lost in IR GetElementPtr instruction
+since the IR types are different from debugInfo types and unions
+are converted to structs in IR.
+
+'``llvm.preserve.array.access.index``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+::
+
+      declare <type2>
+      @llvm.preserve.array.access.index.p0s_union.anons.p0a10s_union.anons(<type> base,
+                                                                           i32 dim,
+                                                                           i32 index)
+
+Overview:
+"""""""""
+
+The '``llvm.preserve.array.access.index``' intrinsic returns the getelementptr address
+based on array base ``base``, array dimension ``dim`` and the last access index ``index``
+into the array.
+
+Arguments:
+""""""""""
+
+The ``base`` is the array base address.  The ``dim`` is the array dimension.
+The ``base`` is a pointer if ``dim`` equals 0.
+The ``index`` is the last access index into the array or pointer.
+
+Semantics:
+""""""""""
+
+The '``llvm.preserve.array.access.index``' intrinsic produces the same result
+as a getelementptr with base ``base`` and access operands ``{dim's 0's, index}``.
+
+'``llvm.preserve.union.access.index``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+::
+
+      declare <type>
+      @llvm.preserve.union.access.index.p0s_union.anons.p0s_union.anons(<type> base,
+                                                                        i32 di_index)
+
+Overview:
+"""""""""
+
+The '``llvm.preserve.union.access.index``' intrinsic carries the debuginfo field index
+``di_index`` and returns the ``base`` address.
+The ``llvm.preserve.access.index`` type of metadata is attached to this call instruction
+to provide union debuginfo type.
+
+Arguments:
+""""""""""
+
+The ``base`` is the union base address. The ``di_index`` is the field index in debuginfo.
+
+Semantics:
+""""""""""
+
+The '``llvm.preserve.union.access.index``' intrinsic returns the ``base`` address.
+
+'``llvm.preserve.struct.access.index``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+::
+
+      declare <type2>
+      @llvm.preserve.struct.access.index.p0i8.p0s_struct.anon.0s(<type> base,
+                                                                 i32 gep_index,
+                                                                 i32 di_index)
+
+Overview:
+"""""""""
+
+The '``llvm.preserve.struct.access.index``' intrinsic returns the getelementptr address
+based on struct base ``base`` and IR struct member index ``gep_index``.
+The ``llvm.preserve.access.index`` type of metadata is attached to this call instruction
+to provide struct debuginfo type.
+
+Arguments:
+""""""""""
+
+The ``base`` is the structure base address. The ``gep_index`` is the struct member index
+based on IR structures. The ``di_index`` is the struct member index based on debuginfo.
+
+Semantics:
+""""""""""
+
+The '``llvm.preserve.struct.access.index``' intrinsic produces the same result
+as a getelementptr with base ``base`` and access operands ``{0, gep_index}``.

Modified: llvm/trunk/include/llvm/IR/IRBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IRBuilder.h?rev=365352&r1=365351&r2=365352&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IRBuilder.h (original)
+++ llvm/trunk/include/llvm/IR/IRBuilder.h Mon Jul  8 10:08:28 2019
@@ -2453,6 +2453,74 @@ public:
     return V;
   }
 
+  Value *CreatePreserveArrayAccessIndex(Value *Base, unsigned Dimension,
+                                        unsigned LastIndex) {
+    assert(isa<PointerType>(Base->getType()) &&
+           "Invalid Base ptr type for preserve.array.access.index.");
+    auto *BaseType = Base->getType();
+
+    Value *LastIndexV = getInt32(LastIndex);
+    Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    SmallVector<Value *, 4> IdxList;
+    for (unsigned I = 0; I < Dimension; ++I)
+      IdxList.push_back(Zero);
+    IdxList.push_back(LastIndexV);
+
+    Type *ResultType =
+        GetElementPtrInst::getGEPReturnType(Base, IdxList);
+
+    Module *M = BB->getParent()->getParent();
+    Function *FnPreserveArrayAccessIndex = Intrinsic::getDeclaration(
+        M, Intrinsic::preserve_array_access_index, {ResultType, BaseType});
+
+    Value *DimV = getInt32(Dimension);
+    CallInst *Fn =
+        CreateCall(FnPreserveArrayAccessIndex, {Base, DimV, LastIndexV});
+
+    return Fn;
+  }
+
+  Value *CreatePreserveUnionAccessIndex(Value *Base, unsigned FieldIndex,
+                                        MDNode *DbgInfo) {
+    assert(isa<PointerType>(Base->getType()) &&
+           "Invalid Base ptr type for preserve.union.access.index.");
+    auto *BaseType = Base->getType();
+
+    Module *M = BB->getParent()->getParent();
+    Function *FnPreserveUnionAccessIndex = Intrinsic::getDeclaration(
+        M, Intrinsic::preserve_union_access_index, {BaseType, BaseType});
+
+    Value *DIIndex = getInt32(FieldIndex);
+    CallInst *Fn =
+        CreateCall(FnPreserveUnionAccessIndex, {Base, DIIndex});
+    Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+
+    return Fn;
+  }
+
+  Value *CreatePreserveStructAccessIndex(Value *Base, unsigned Index,
+                                         unsigned FieldIndex, MDNode *DbgInfo) {
+    assert(isa<PointerType>(Base->getType()) &&
+           "Invalid Base ptr type for preserve.struct.access.index.");
+    auto *BaseType = Base->getType();
+
+    Value *GEPIndex = getInt32(Index);
+    Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    Type *ResultType =
+        GetElementPtrInst::getGEPReturnType(Base, {Zero, GEPIndex});
+
+    Module *M = BB->getParent()->getParent();
+    Function *FnPreserveStructAccessIndex = Intrinsic::getDeclaration(
+        M, Intrinsic::preserve_struct_access_index, {ResultType, BaseType});
+
+    Value *DIIndex = getInt32(FieldIndex);
+    CallInst *Fn = CreateCall(FnPreserveStructAccessIndex,
+                              {Base, GEPIndex, DIIndex});
+    Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+
+    return Fn;
+  }
+
 private:
   /// Helper function that creates an assume intrinsic call that
   /// represents an alignment assumption on the provided Ptr, Mask, Type

Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=365352&r1=365351&r2=365352&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
+++ llvm/trunk/include/llvm/IR/Intrinsics.td Mon Jul  8 10:08:28 2019
@@ -1040,7 +1040,6 @@ def int_clear_cache : Intrinsic<[], [llv
 // Intrinsic to detect whether its argument is a constant.
 def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">;
 
-
 //===-------------------------- Masked Intrinsics -------------------------===//
 //
 def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
@@ -1214,6 +1213,22 @@ def int_loop_decrement_reg :
 
 def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>],
                              [IntrNoMem, Returned<0>]>;
+
+//===------- Intrinsics that are used to preserve debug information -------===//
+
+def int_preserve_array_access_index : Intrinsic<[llvm_anyptr_ty],
+                                                [llvm_anyptr_ty, llvm_i32_ty,
+                                                 llvm_i32_ty],
+                                                [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
+def int_preserve_union_access_index : Intrinsic<[llvm_anyptr_ty],
+                                                [llvm_anyptr_ty, llvm_i32_ty],
+                                                [IntrNoMem, ImmArg<1>]>;
+def int_preserve_struct_access_index : Intrinsic<[llvm_anyptr_ty],
+                                                 [llvm_anyptr_ty, llvm_i32_ty,
+                                                  llvm_i32_ty],
+                                                 [IntrNoMem, ImmArg<1>,
+                                                  ImmArg<2>]>;
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/include/llvm/IR/LLVMContext.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/LLVMContext.h?rev=365352&r1=365351&r2=365352&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/LLVMContext.h (original)
+++ llvm/trunk/include/llvm/IR/LLVMContext.h Mon Jul  8 10:08:28 2019
@@ -99,6 +99,7 @@ public:
     MD_irr_loop = 24,                 // "irr_loop"
     MD_access_group = 25,             // "llvm.access.group"
     MD_callback = 26,                 // "callback"
+    MD_preserve_access_index = 27,    // "llvm.preserve.*.access.index"
   };
 
   /// Known operand bundle tag IDs, which always have the same value.  All

Modified: llvm/trunk/lib/IR/LLVMContext.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/LLVMContext.cpp?rev=365352&r1=365351&r2=365352&view=diff
==============================================================================
--- llvm/trunk/lib/IR/LLVMContext.cpp (original)
+++ llvm/trunk/lib/IR/LLVMContext.cpp Mon Jul  8 10:08:28 2019
@@ -63,6 +63,7 @@ LLVMContext::LLVMContext() : pImpl(new L
     {MD_irr_loop, "irr_loop"},
     {MD_access_group, "llvm.access.group"},
     {MD_callback, "callback"},
+    {MD_preserve_access_index, "llvm.preserve.access.index"},
   };
 
   for (auto &MDKind : MDKinds) {




More information about the llvm-commits mailing list