r365438 - [BPF] Preserve debuginfo array/union/struct type/access index

Yonghong Song via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 8 21:21:51 PDT 2019


Author: yhs
Date: Mon Jul  8 21:21:50 2019
New Revision: 365438

URL: http://llvm.org/viewvc/llvm-project?rev=365438&view=rev
Log:
[BPF] Preserve debuginfo array/union/struct type/access index

For background of BPF CO-RE project, please refer to
  http://vger.kernel.org/bpfconf2019.html
In summary, BPF CO-RE intends to compile bpf programs
adjustable on struct/union layout change so the same
program can run on multiple kernels with adjustment
before loading based on native kernel structures.

In order to do this, we need keep track of GEP(getelementptr)
instruction base and result debuginfo types, so we
can adjust on the host based on kernel BTF info.
Capturing such information as an IR optimization is hard
as various optimization may have tweaked GEP and also
union is replaced by structure it is impossible to track
fieldindex for union member accesses.

Three intrinsic functions, preserve_{array,union,struct}_access_index,
are introducted.
  addr = preserve_array_access_index(base, index, dimension)
  addr = preserve_union_access_index(base, di_index)
  addr = preserve_struct_access_index(base, gep_index, di_index)
here,
  base: the base pointer for the array/union/struct access.
  index: the last access index for array, the same for IR/DebugInfo layout.
  dimension: the array dimension.
  gep_index: the access index based on IR layout.
  di_index: the access index based on user/debuginfo types.

If using these intrinsics blindly, i.e., transforming all GEPs
to these intrinsics and later on reducing them to GEPs, we have
seen up to 7% more instructions generated. To avoid such an overhead,
a clang builtin is proposed:
  base = __builtin_preserve_access_index(base)
such that user wraps to-be-relocated GEPs in this builtin
and preserve_*_access_index intrinsics only apply to
those GEPs. Such a buyin will prevent performance degradation
if people do not use CO-RE, even for programs which use
bpf_probe_read().

For example, for the following example,
  $ cat test.c
  struct sk_buff {
     int i;
     int b1:1;
     int b2:2;
     union {
       struct {
         int o1;
         int o2;
       } o;
       struct {
         char flags;
         char dev_id;
       } dev;
       int netid;
     } u[10];
  };

  static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr)
      = (void *) 4;

  #define _(x) (__builtin_preserve_access_index(x))

  int bpf_prog(struct sk_buff *ctx) {
    char dev_id;
    bpf_probe_read(&dev_id, sizeof(char), _(&ctx->u[5].dev.dev_id));
    return dev_id;
  }
  $ clang -target bpf -O2 -g -emit-llvm -S -mllvm -print-before-all \
    test.c >& log

The generated IR looks like below:
  ...
  define dso_local i32 @bpf_prog(%struct.sk_buff*) #0 !dbg !15 {
    %2 = alloca %struct.sk_buff*, align 8
    %3 = alloca i8, align 1
    store %struct.sk_buff* %0, %struct.sk_buff** %2, align 8, !tbaa !45
    call void @llvm.dbg.declare(metadata %struct.sk_buff** %2, metadata !43, metadata !DIExpression()), !dbg !49
    call void @llvm.lifetime.start.p0i8(i64 1, i8* %3) #4, !dbg !50
    call void @llvm.dbg.declare(metadata i8* %3, metadata !44, metadata !DIExpression()), !dbg !51
    %4 = load i32 (i8*, i32, i8*)*, i32 (i8*, i32, i8*)** @bpf_probe_read, align 8, !dbg !52, !tbaa !45
    %5 = load %struct.sk_buff*, %struct.sk_buff** %2, align 8, !dbg !53, !tbaa !45
    %6 = call [10 x %union.anon]* @llvm.preserve.struct.access.index.p0a10s_union.anons.p0s_struct.sk_buffs(
         %struct.sk_buff* %5, i32 2, i32 3), !dbg !53, !llvm.preserve.access.index !19
    %7 = call %union.anon* @llvm.preserve.array.access.index.p0s_union.anons.p0a10s_union.anons(
         [10 x %union.anon]* %6, i32 1, i32 5), !dbg !53
    %8 = call %union.anon* @llvm.preserve.union.access.index.p0s_union.anons.p0s_union.anons(
         %union.anon* %7, i32 1), !dbg !53, !llvm.preserve.access.index !26
    %9 = bitcast %union.anon* %8 to %struct.anon.0*, !dbg !53
    %10 = call i8* @llvm.preserve.struct.access.index.p0i8.p0s_struct.anon.0s(
         %struct.anon.0* %9, i32 1, i32 1), !dbg !53, !llvm.preserve.access.index !34
    %11 = call i32 %4(i8* %3, i32 1, i8* %10), !dbg !52
    %12 = load i8, i8* %3, align 1, !dbg !54, !tbaa !55
    %13 = sext i8 %12 to i32, !dbg !54
    call void @llvm.lifetime.end.p0i8(i64 1, i8* %3) #4, !dbg !56
    ret i32 %13, !dbg !57
  }

  !19 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "sk_buff", file: !3, line: 1, size: 704, elements: !20)
  !26 = distinct !DICompositeType(tag: DW_TAG_union_type, scope: !19, file: !3, line: 5, size: 64, elements: !27)
  !34 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !26, file: !3, line: 10, size: 16, elements: !35)

Note that @llvm.preserve.{struct,union}.access.index calls have metadata llvm.preserve.access.index
attached to instructions to provide struct/union debuginfo type information.

For &ctx->u[5].dev.dev_id,
  . The "%6 = ..." represents struct member "u" with index 2 for IR layout and index 3 for DI layout.
  . The "%7 = ..." represents array subscript "5".
  . The "%8 = ..." represents union member "dev" with index 1 for DI layout.
  . The "%10 = ..." represents struct member "dev_id" with index 1 for both IR and DI layout.

Basically, traversing the use-def chain recursively for the 3rd argument of bpf_probe_read() and
examining all preserve_*_access_index calls, the debuginfo struct/union/array access index
can be achieved.

The intrinsics also contain enough information to regenerate codes for IR layout.
For array and structure intrinsics, the proper GEP can be constructed.
For union intrinsics, replacing all uses of "addr" with "base" should be enough.

Signed-off-by: Yonghong Song <yhs at fb.com>

Differential Revision: https://reviews.llvm.org/D61809

Added:
    cfe/trunk/test/CodeGen/bpf-preserve-access-index-2.c
    cfe/trunk/test/CodeGen/bpf-preserve-access-index.c
Modified:
    cfe/trunk/docs/LanguageExtensions.rst
    cfe/trunk/include/clang/Basic/Builtins.def
    cfe/trunk/lib/CodeGen/CGBuilder.h
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/lib/CodeGen/CGExpr.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h
    cfe/trunk/lib/Sema/SemaChecking.cpp

Modified: cfe/trunk/docs/LanguageExtensions.rst
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/docs/LanguageExtensions.rst?rev=365438&r1=365437&r2=365438&view=diff
==============================================================================
--- cfe/trunk/docs/LanguageExtensions.rst (original)
+++ cfe/trunk/docs/LanguageExtensions.rst Mon Jul  8 21:21:50 2019
@@ -1950,6 +1950,35 @@ form of ``__builtin_operator_delete`` is
 These builtins are intended for use in the implementation of ``std::allocator``
 and other similar allocation libraries, and are only available in C++.
 
+``__builtin_preserve_access_index``
+-----------------------------------
+
+``__builtin_preserve_access_index`` specifies a code section where
+array subscript access and structure/union member access are relocatable
+under bpf compile-once run-everywhere framework. Debuginfo (typically
+with ``-g``) is needed, otherwise, the compiler will exit with an error.
+
+**Syntax**:
+
+.. code-block:: c
+
+  const void * __builtin_preserve_access_index(const void * ptr)
+
+**Example of Use**:
+
+.. code-block:: c
+
+  struct t {
+    int i;
+    int j;
+    union {
+      int a;
+      int b;
+    } c[4];
+  };
+  struct t *v = ...;
+  const void *pb =__builtin_preserve_access_index(&v->c[3].b);
+
 Multiprecision Arithmetic Builtins
 ----------------------------------
 

Modified: cfe/trunk/include/clang/Basic/Builtins.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=365438&r1=365437&r2=365438&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/Builtins.def (original)
+++ cfe/trunk/include/clang/Basic/Builtins.def Mon Jul  8 21:21:50 2019
@@ -1449,6 +1449,7 @@ BUILTIN(__builtin_operator_new, "v*z", "
 BUILTIN(__builtin_operator_delete, "vv*", "tn")
 BUILTIN(__builtin_char_memchr, "c*cC*iz", "n")
 BUILTIN(__builtin_dump_struct, "ivC*v*", "tn")
+BUILTIN(__builtin_preserve_access_index, "vC*vC*", "nU")
 
 // Safestack builtins
 BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn")

Modified: cfe/trunk/lib/CodeGen/CGBuilder.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuilder.h?rev=365438&r1=365437&r2=365438&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuilder.h (original)
+++ cfe/trunk/lib/CodeGen/CGBuilder.h Mon Jul  8 21:21:50 2019
@@ -298,6 +298,21 @@ public:
     return CreateMemSet(Dest.getPointer(), Value, Size,
                         Dest.getAlignment().getQuantity(), IsVolatile);
   }
+
+  using CGBuilderBaseTy::CreatePreserveStructAccessIndex;
+  Address CreatePreserveStructAccessIndex(Address Addr,
+                                          unsigned Index,
+                                          unsigned FieldIndex,
+                                          llvm::MDNode *DbgInfo) {
+    llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType());
+    const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
+    const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
+    auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
+
+    return Address(CreatePreserveStructAccessIndex(Addr.getPointer(),
+                                                   Index, FieldIndex, DbgInfo),
+                   Addr.getAlignment().alignmentAtOffset(Offset));
+  }
 };
 
 }  // end namespace CodeGen

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=365438&r1=365437&r2=365438&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jul  8 21:21:50 2019
@@ -1840,6 +1840,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(
     return RValue::get(Res);
   }
 
+  case Builtin::BI__builtin_preserve_access_index: {
+    // Only enabled preserved access index region when debuginfo
+    // is available as debuginfo is needed to preserve user-level
+    // access pattern.
+    if (!getDebugInfo()) {
+      CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
+      return RValue::get(EmitScalarExpr(E->getArg(0)));
+    }
+
+    // Nested builtin_preserve_access_index() not supported
+    if (IsInPreservedAIRegion) {
+      CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
+      return RValue::get(EmitScalarExpr(E->getArg(0)));
+    }
+
+    IsInPreservedAIRegion = true;
+    Value *Res = EmitScalarExpr(E->getArg(0));
+    IsInPreservedAIRegion = false;
+    return RValue::get(Res);
+  }
+
   case Builtin::BI__builtin_cimag:
   case Builtin::BI__builtin_cimagf:
   case Builtin::BI__builtin_cimagl:

Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=365438&r1=365437&r2=365438&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Mon Jul  8 21:21:50 2019
@@ -25,6 +25,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/NSAPI.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
@@ -3418,8 +3419,20 @@ static Address emitArraySubscriptGEP(Cod
   CharUnits eltAlign =
     getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize);
 
-  llvm::Value *eltPtr = emitArraySubscriptGEP(
-      CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name);
+  llvm::Value *eltPtr;
+  auto LastIndex = dyn_cast<llvm::ConstantInt>(indices.back());
+  if (!CGF.IsInPreservedAIRegion || !LastIndex) {
+    eltPtr = emitArraySubscriptGEP(
+        CGF, addr.getPointer(), indices, inbounds, signedIndices,
+        loc, name);
+  } else {
+    // Remember the original array subscript for bpf target
+    unsigned idx = LastIndex->getZExtValue();
+    eltPtr = CGF.Builder.CreatePreserveArrayAccessIndex(addr.getPointer(),
+                                                        indices.size() - 1,
+                                                        idx);
+  }
+
   return Address(eltPtr, eltAlign);
 }
 
@@ -3908,6 +3921,19 @@ static Address emitAddrOfFieldStorage(Co
   return CGF.Builder.CreateStructGEP(base, idx, field->getName());
 }
 
+static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base,
+                                        const FieldDecl *field) {
+  const RecordDecl *rec = field->getParent();
+  llvm::DIType *DbgInfo = CGF.getDebugInfo()->getOrCreateRecordType(
+      CGF.getContext().getRecordType(rec), rec->getLocation());
+
+  unsigned idx =
+      CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
+
+  return CGF.Builder.CreatePreserveStructAccessIndex(
+      base, idx, field->getFieldIndex(), DbgInfo);
+}
+
 static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
   const auto *RD = Type.getTypePtr()->getAsCXXRecordDecl();
   if (!RD)
@@ -4015,9 +4041,24 @@ LValue CodeGenFunction::EmitLValueForFie
       // a barrier every time CXXRecord field with vptr is referenced.
       addr = Address(Builder.CreateLaunderInvariantGroup(addr.getPointer()),
                      addr.getAlignment());
+
+    if (IsInPreservedAIRegion) {
+      // Remember the original union field index
+      llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType(
+          getContext().getRecordType(rec), rec->getLocation());
+      addr = Address(
+          Builder.CreatePreserveUnionAccessIndex(
+              addr.getPointer(), field->getFieldIndex(), DbgInfo),
+          addr.getAlignment());
+    }
   } else {
-    // For structs, we GEP to the field that the record layout suggests.
-    addr = emitAddrOfFieldStorage(*this, addr, field);
+
+    if (!IsInPreservedAIRegion)
+      // For structs, we GEP to the field that the record layout suggests.
+      addr = emitAddrOfFieldStorage(*this, addr, field);
+    else
+      // Remember the original struct field index
+      addr = emitPreserveStructAccess(*this, addr, field);
 
     // If this is a reference field, load the reference right now.
     if (FieldType->isReferenceType()) {

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=365438&r1=365437&r2=365438&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Mon Jul  8 21:21:50 2019
@@ -480,6 +480,10 @@ public:
   /// finally block or filter expression.
   bool IsOutlinedSEHHelper = false;
 
+  /// True if CodeGen currently emits code inside presereved access index
+  /// region.
+  bool IsInPreservedAIRegion = false;
+
   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
   llvm::Value *BlockPointer = nullptr;
 

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=365438&r1=365437&r2=365438&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Mon Jul  8 21:21:50 2019
@@ -191,6 +191,16 @@ static bool SemaBuiltinAddressof(Sema &S
   return false;
 }
 
+/// Check the number of arguments, and set the result type to
+/// the argument type.
+static bool SemaBuiltinPreserveAI(Sema &S, CallExpr *TheCall) {
+  if (checkArgCount(S, TheCall, 1))
+    return true;
+
+  TheCall->setType(TheCall->getArg(0)->getType());
+  return false;
+}
+
 static bool SemaBuiltinOverflow(Sema &S, CallExpr *TheCall) {
   if (checkArgCount(S, TheCall, 3))
     return true;
@@ -1409,6 +1419,10 @@ Sema::CheckBuiltinFunctionCall(FunctionD
     TheCall->setType(Context.IntTy);
     break;
   }
+  case Builtin::BI__builtin_preserve_access_index:
+    if (SemaBuiltinPreserveAI(*this, TheCall))
+      return ExprError();
+    break;
   case Builtin::BI__builtin_call_with_static_chain:
     if (SemaBuiltinCallWithStaticChain(*this, TheCall))
       return ExprError();

Added: cfe/trunk/test/CodeGen/bpf-preserve-access-index-2.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/bpf-preserve-access-index-2.c?rev=365438&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/bpf-preserve-access-index-2.c (added)
+++ cfe/trunk/test/CodeGen/bpf-preserve-access-index-2.c Mon Jul  8 21:21:50 2019
@@ -0,0 +1,22 @@
+// RUN: %clang %s -target bpfeb -x c -emit-llvm -S -g -O2 -o - | FileCheck %s
+// RUN: %clang %s -target bpfel -x c -emit-llvm -S -g -O2 -o - | FileCheck %s
+
+struct t {
+  int i:1;
+  int j:2;
+  union {
+   int a;
+   int b;
+  } c[4];
+};
+
+#define _(x) (x)
+
+const void *test(struct t *arg) {
+  return _(&arg->c[3].b);
+}
+
+// CHECK-NOT: llvm.preserve.struct.access.index
+// CHECK-NOT: llvm.preserve.array.access.index
+// CHECK-NOT: llvm.preserve.union.access.index
+// CHECK-NOT: __builtin_preserve_access_index

Added: cfe/trunk/test/CodeGen/bpf-preserve-access-index.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/bpf-preserve-access-index.c?rev=365438&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/bpf-preserve-access-index.c (added)
+++ cfe/trunk/test/CodeGen/bpf-preserve-access-index.c Mon Jul  8 21:21:50 2019
@@ -0,0 +1,22 @@
+// RUN: %clang %s -target bpfeb -x c -emit-llvm -S -g -O2 -o - | FileCheck --check-prefix=CHECK %s
+// RUN: %clang %s -target bpfel -x c -emit-llvm -S -g -O2 -o - | FileCheck --check-prefix=CHECK %s
+
+struct t {
+  int i:1;
+  int j:2;
+  union {
+   int a;
+   int b;
+  } c[4];
+};
+
+#define _(x) (__builtin_preserve_access_index(x))
+
+const void *test(struct t *arg) {
+  return _(&arg->c[3].b);
+}
+
+// CHECK: llvm.preserve.struct.access.index
+// CHECK: llvm.preserve.array.access.index
+// CHECK: llvm.preserve.union.access.index
+// CHECK-NOT: __builtin_preserve_access_index




More information about the cfe-commits mailing list