[PATCH] D61809: [BPF] Preserve original struct/union type name/access index and array subscripts

Yonghong Song via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Fri May 10 15:36:41 PDT 2019


yonghong-song created this revision.
yonghong-song added reviewers: eli.friedman, ast.
Herald added subscribers: cfe-commits, arphaman, kosarev.
Herald added a project: clang.

For background of BPF CO-RE project, please refer to

  http://vger.kernel.org/bpfconf2019.html

In summary, BPF CO-RE intends to compile bpf programs
adjustable on struct/union layout change so the same
program can run on multiple kernels with adjustment
before loading based on native kernel structures.

In order to do this, we need keep track of GEP(getelementptr)
instruction base and result debuginfo types, so we
can adjust on the host based on kernel BTF info.
Capturing such information as an IR optimization is hard
as various optimization may have tweaked GEP and also
union is replaced by structure it is impossible to track
fieldindex for union member accesses.

An intrinsic function, preserve_di_access_index, is introducted.

  naddr = preserve_di_access_index(addr, base, type_name, access_index)

here,

  addr: the previous getelementptr result, used as the result value do
        program semantics are kept intact.
  base: the base of previous getelementptr, used for later
        code generation with new relocatable access offset.
  type_name: the struct/union type name if available, can be used to
        match corresponding types in debuginfo.
  access_index: the access index based on user/debuginfo types.
  naddr: the result, having the same type as "addr".

For example, for the following example,

  $ cat test.c
  struct sk_buff {
    int i;
    int b1:1;
    int b2:2;
    union {
      struct {
        int o1;
        int o2;
      } o;
      struct {
        char flags;
        char dev_id;
      } dev;
      int netid;
    } u[10];
  };
  
  static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr)
      = (void *) 4;
  
  int bpf_prog(struct sk_buff *ctx) {
    char dev_id;
    bpf_probe_read(&dev_id, sizeof(char), &ctx->u[5].dev.dev_id);
    return dev_id;
  }
  
  $ clang -target bpf -O2 -g -emit-llvm -S -mllvm -print-before-all test.c >& log

The generated IR looks like below:

  ...

define dso_local i32 @bpf_prog(%struct.sk_buff*) #0 !dbg !15 {

  %2 = alloca %struct.sk_buff*, align 8
  %3 = alloca i8, align 1
  store %struct.sk_buff* %0, %struct.sk_buff** %2, align 8, !tbaa !45
  call void @llvm.dbg.declare(metadata %struct.sk_buff** %2, metadata !43, metadata !DIExpression()), !dbg !49
  call void @llvm.lifetime.start.p0i8(i64 1, i8* %3) #4, !dbg !50
  call void @llvm.dbg.declare(metadata i8* %3, metadata !44, metadata !DIExpression()), !dbg !51
  %4 = load i32 (i8*, i32, i8*)*, i32 (i8*, i32, i8*)** @bpf_probe_read, align 8, !dbg !52, !tbaa !45
  %5 = load %struct.sk_buff*, %struct.sk_buff** %2, align 8, !dbg !53, !tbaa !45
  %6 = getelementptr inbounds %struct.sk_buff, %struct.sk_buff* %5, i32 0, i32 2, !dbg !54
  %7 = call [10 x %union.anon]*
       @llvm.preserve.di.access.index.p0a10s_union.anons.p0a10s_union.anons.p0s_struct.sk_buffs(
       [10 x %union.anon]* %6, %struct.sk_buff* %5,
       i8* getelementptr inbounds ([8 x i8], [8 x i8]* @0, i32 0, i32 0), i32 3), !dbg !54
  %8 = getelementptr inbounds [10 x %union.anon], [10 x %union.anon]* %7, i64 0, i64 5, !dbg !53
  %9 = call %union.anon* @llvm.preserve.di.access.index.p0s_union.anons.p0s_union.anons.p0a10s_union.anons(
       %union.anon* %8, [10 x %union.anon]* %7,
       i8* getelementptr inbounds ([1 x i8], [1 x i8]* @1, i32 0, i32 0), i32 5), !dbg !53
  %10 = call %union.anon* @llvm.preserve.di.access.index.p0s_union.anons.p0s_union.anons.p0s_union.anons(
        %union.anon* %9, %union.anon* %9, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @2, i32 0, i32 0), i32 1), !dbg !55
  %11 = bitcast %union.anon* %10 to %struct.anon.0*, !dbg !55
  %12 = getelementptr inbounds %struct.anon.0, %struct.anon.0* %11, i32 0, i32 1, !dbg !56
  %13 = call i8* @llvm.preserve.di.access.index.p0i8.p0i8.p0s_struct.anon.0s(i8* %12, %struct.anon.0* %11,
        i8* getelementptr inbounds ([1 x i8], [1 x i8]* @3, i32 0, i32 0), i32 1), !dbg !56
  %14 = call i32 %4(i8* %3, i32 1, i8* %13), !dbg !52
  %15 = load i8, i8* %3, align 1, !dbg !57, !tbaa !58
  %16 = sext i8 %15 to i32, !dbg !57
  call void @llvm.lifetime.end.p0i8(i64 1, i8* %3) #4, !dbg !59
  ret i32 %16, !dbg !60

}

For &ctx->u[5].dev.dev_id,

  . The first getelementptr (%6 = ...) has index 2 based on IR layout, and subsequent
    preserve_di_access_index (%7 = ...) has index 3 which reflects the debuginfo type layout.
  . The second getelementptr (%8 = ...) has index 5 which is the same as preserve_di_access_index
    (%9 = ...) for array subscript.
  . The instruction "%10 ..." is a call to preserve_di_access_index, which encodes the union member
    access index "1". Such information is lost in the original IR.
  . The third getelementptr (%12 = ...) has index 1 anonymous struct member "dev_id". The
    subsequent preserve_di_access_index also has the index "1".

Basically, traversing the use-def chain recursively for the 3rd argument of bpf_probe_read() and
examining all preserve_di_access_index calls, the original struct/union/array access index
can be achieved.

The original type name is also kept by preserve_di_access_index call.
For example, for "%7 = ...", the second argument is a constantexpr getelementptr instrunction
with base

  @0 = private unnamed_addr constant [8 x i8] c"sk_buff\00", align 1

which indicates the type name sk_buff. The type name can be used to search in DebugInfo types
to find the precise definition.

Finally, the intrinsic

  naddr = preserve_di_access_index(addr, base, type_name, access_index)

can be simply transformed to

  naddr = addr

or deleted by replacing all uses of "naddr" with "addr".


Repository:
  rC Clang

https://reviews.llvm.org/D61809

Files:
  lib/Basic/Targets/BPF.h
  lib/CodeGen/CGExpr.cpp


Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -3352,6 +3352,15 @@
 
   llvm::Value *eltPtr = emitArraySubscriptGEP(
       CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name);
+
+  // Remember the original array subscript for bpf target
+  auto Arch = CGF.getTarget().getTriple().getArch();
+  if (Arch == llvm::Triple::bpfeb || Arch == llvm::Triple::bpfel)
+    return Address(CGF.Builder.CreatePreserveDIAccessIndex(
+                       eltPtr, addr.getPointer(), StringRef(),
+                       cast<llvm::ConstantInt>(indices.back())->getZExtValue()),
+                   eltAlign);
+
   return Address(eltPtr, eltAlign);
 }
 
@@ -3932,9 +3941,31 @@
       // a barrier every time CXXRecord field with vptr is referenced.
       addr = Address(Builder.CreateLaunderInvariantGroup(addr.getPointer()),
                      addr.getAlignment());
+
+    // Remember the original union field index for bpf target
+    auto Arch = CGM.getTarget().getTriple().getArch();
+    if (Arch == llvm::Triple::bpfeb || Arch == llvm::Triple::bpfel)
+      addr = Address(
+          Builder.CreatePreserveDIAccessIndex(addr.getPointer(),
+                                              addr.getPointer(),
+                                              rec->getName(),
+                                              field->getFieldIndex()),
+          addr.getAlignment());
   } else {
     // For structs, we GEP to the field that the record layout suggests.
-    addr = emitAddrOfFieldStorage(*this, addr, field);
+    Address naddr = emitAddrOfFieldStorage(*this, addr, field);
+
+    // Remember the original struct field index for bpf target
+    auto Arch = CGM.getTarget().getTriple().getArch();
+    if (Arch != llvm::Triple::bpfeb && Arch != llvm::Triple::bpfel)
+      addr = std::move(naddr);
+    else
+      addr = Address(
+          Builder.CreatePreserveDIAccessIndex(naddr.getPointer(),
+                                              addr.getPointer(),
+                                              rec->getName(),
+                                              field->getFieldIndex()),
+          naddr.getAlignment());
 
     // If this is a reference field, load the reference right now.
     if (FieldType->isReferenceType()) {
Index: lib/Basic/Targets/BPF.h
===================================================================
--- lib/Basic/Targets/BPF.h
+++ lib/Basic/Targets/BPF.h
@@ -46,7 +46,8 @@
                         MacroBuilder &Builder) const override;
 
   bool hasFeature(StringRef Feature) const override {
-    return Feature == "bpf" || Feature == "alu32" || Feature == "dwarfris";
+    return Feature == "bpf" || Feature == "alu32" || Feature == "dwarfris" ||
+           Feature == "offsetreloc";
   }
 
   void setFeatureEnabled(llvm::StringMap<bool> &Features, StringRef Name,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D61809.199098.patch
Type: text/x-patch
Size: 2942 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20190510/bce7017e/attachment.bin>


More information about the cfe-commits mailing list