[llvm] 481d67d - [Clang][BPF] Support record argument with direct values

Yonghong Song via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 18 19:12:09 PDT 2022


Author: Yonghong Song
Date: 2022-08-18T19:11:50-07:00
New Revision: 481d67d310a7a4213da72f838d6bafaa52ed01d3

URL: https://github.com/llvm/llvm-project/commit/481d67d310a7a4213da72f838d6bafaa52ed01d3
DIFF: https://github.com/llvm/llvm-project/commit/481d67d310a7a4213da72f838d6bafaa52ed01d3.diff

LOG: [Clang][BPF] Support record argument with direct values

Currently, record arguments are always passed by reference by allocating
space for record values in the caller. This is less efficient for
small records which may take one or two registers. For example,
for x86_64 and aarch64, for a record size up to 16 bytes, the record
values can be passed by values directly on the registers.

This patch added BPF support of record argument with direct values
for up to 16 byte record size. If record size is 0, that record
will not take any register, which is the same behavior for x86_64
and aarch64. If the record size is greater than 16 bytes, the
record argument will be passed by reference.

Differential Revision: https://reviews.llvm.org/D132144

Added: 
    clang/test/CodeGen/bpf-struct-argument.c
    clang/test/CodeGen/bpf-union-argument.c
    llvm/test/CodeGen/BPF/struct-arg-inline.ll
    llvm/test/CodeGen/BPF/struct-arg.ll

Modified: 
    clang/lib/CodeGen/TargetInfo.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 2790c665c6dd2..39871b0907ede 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -11509,6 +11509,42 @@ class BPFABIInfo : public DefaultABIInfo {
 public:
   BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
+  ABIArgInfo classifyArgumentType(QualType Ty) const {
+    Ty = useFirstFieldIfTransparentUnion(Ty);
+
+    if (isAggregateTypeForABI(Ty)) {
+      uint64_t Bits = getContext().getTypeSize(Ty);
+      if (Bits == 0)
+        return ABIArgInfo::getIgnore();
+
+      // If the aggregate needs 1 or 2 registers, do not use reference.
+      if (Bits <= 128) {
+        llvm::Type *CoerceTy;
+        if (Bits <= 64) {
+          CoerceTy =
+              llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
+        } else {
+          llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), 64);
+          CoerceTy = llvm::ArrayType::get(RegTy, 2);
+        }
+        return ABIArgInfo::getDirect(CoerceTy);
+      } else {
+        return getNaturalAlignIndirect(Ty);
+      }
+    }
+
+    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+      Ty = EnumTy->getDecl()->getIntegerType();
+
+    ASTContext &Context = getContext();
+    if (const auto *EIT = Ty->getAs<BitIntType>())
+      if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty))
+        return getNaturalAlignIndirect(Ty);
+
+    return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+                                              : ABIArgInfo::getDirect());
+  }
+
   ABIArgInfo classifyReturnType(QualType RetTy) const {
     if (RetTy->isVoidType())
       return ABIArgInfo::getIgnore();

diff  --git a/clang/test/CodeGen/bpf-struct-argument.c b/clang/test/CodeGen/bpf-struct-argument.c
new file mode 100644
index 0000000000000..d4fcf16af2e28
--- /dev/null
+++ b/clang/test/CodeGen/bpf-struct-argument.c
@@ -0,0 +1,36 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang_cc1 -triple bpf -O2 -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+
+struct t1 {};
+struct t2 {
+  int a;
+};
+struct t3 {
+  int a;
+  long b;
+};
+struct t4 {
+  long a;
+  long b;
+  long c;
+};
+
+int foo1(struct t1 arg1, struct t2 arg2) {
+// CHECK: define dso_local i32 @foo1(i32 %arg2.coerce)
+  return arg2.a;
+}
+
+int foo2(struct t3 arg1, struct t4 arg2) {
+// CHECK: define dso_local i32 @foo2([2 x i64] %arg1.coerce, ptr noundef byval(%struct.t4) align 8 %arg2)
+  return arg1.a + arg2.a;
+}
+
+int foo3(void) {
+  struct t1 tmp1 = {};
+  struct t2 tmp2 = {};
+  struct t3 tmp3 = {};
+  struct t4 tmp4 = {};
+  return foo1(tmp1, tmp2) + foo2(tmp3, tmp4);
+// CHECK: call i32 @foo1(i32 %{{[a-zA-Z0-9]+}})
+// CHECK: call i32 @foo2([2 x i64] %{{[a-zA-Z0-9]+}}, ptr noundef byval(%struct.t4) align 8 %tmp4)
+}

diff  --git a/clang/test/CodeGen/bpf-union-argument.c b/clang/test/CodeGen/bpf-union-argument.c
new file mode 100644
index 0000000000000..5f3a0bc5a9261
--- /dev/null
+++ b/clang/test/CodeGen/bpf-union-argument.c
@@ -0,0 +1,44 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang_cc1 -triple bpf -O2 -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+
+union t1 {};
+union t2 {
+  int a;
+  long b;
+};
+union t3 {
+  struct {
+    int a;
+    long b;
+  };
+  long c;
+};
+union t4 {
+  struct {
+    long a;
+    long b;
+    long c;
+  };
+  long d;
+};
+
+int foo1(union t1 arg1, union t2 arg2) {
+// CHECK: define dso_local i32 @foo1(i64 %arg2.coerce)
+  return arg2.a;
+}
+
+int foo2(union t3 arg1, union t4 arg2) {
+// CHECK: define dso_local i32 @foo2([2 x i64] %arg1.coerce, ptr noundef byval(%union.t4) align 8 %arg2)
+  return arg1.a + arg2.a;
+
+}
+
+int foo3(void) {
+  union t1 tmp1 = {};
+  union t2 tmp2 = {};
+  union t3 tmp3 = {};
+  union t4 tmp4 = {};
+  return foo1(tmp1, tmp2) + foo2(tmp3, tmp4);
+// CHECK: call i32 @foo1(i64 %{{[a-zA-Z0-9]+}})
+// CHECK: call i32 @foo2([2 x i64] %{{[a-zA-Z0-9]+}}, ptr noundef byval(%union.t4) align 8 %tmp4)
+}

diff  --git a/llvm/test/CodeGen/BPF/struct-arg-inline.ll b/llvm/test/CodeGen/BPF/struct-arg-inline.ll
new file mode 100644
index 0000000000000..969533fafc0b0
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/struct-arg-inline.ll
@@ -0,0 +1,108 @@
+; RUN: opt -O2 -mtriple=bpf-pc-linux %s | llvm-dis > %t1
+; RUN: llc %t1 -o - | FileCheck %s
+; Source:
+;   struct t1 {
+;     long a;
+;   };
+;   struct t2 {
+;     long a;
+;     long b;
+;   };
+;   __attribute__((always_inline))
+;   static long foo1(struct t2 a1, struct t1 a2, struct t1 a3, struct t1 a4,
+;                    struct t1 a5, struct t2 a6) {
+;     return a1.a + a2.a + a3.a + a4.a + a5.a + a6.a;
+;   }
+;   long foo2(struct t2 a1, struct t2 a2, struct t1 a3) {
+;     return foo1(a1, a3, a3, a3, a3, a2);
+;   }
+; Compilation flags:
+;   clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c
+
+%struct.t2 = type { i64, i64 }
+%struct.t1 = type { i64 }
+
+; Function Attrs: nounwind
+define dso_local i64 @foo2([2 x i64] %a1.coerce, [2 x i64] %a2.coerce, i64 %a3.coerce) #0 {
+entry:
+  %a1 = alloca %struct.t2, align 8
+  %a2 = alloca %struct.t2, align 8
+  %a3 = alloca %struct.t1, align 8
+  store [2 x i64] %a1.coerce, ptr %a1, align 8
+  store [2 x i64] %a2.coerce, ptr %a2, align 8
+  %coerce.dive = getelementptr inbounds %struct.t1, ptr %a3, i32 0, i32 0
+  store i64 %a3.coerce, ptr %coerce.dive, align 8
+  %0 = load [2 x i64], ptr %a1, align 8
+  %coerce.dive1 = getelementptr inbounds %struct.t1, ptr %a3, i32 0, i32 0
+  %1 = load i64, ptr %coerce.dive1, align 8
+  %coerce.dive2 = getelementptr inbounds %struct.t1, ptr %a3, i32 0, i32 0
+  %2 = load i64, ptr %coerce.dive2, align 8
+  %coerce.dive3 = getelementptr inbounds %struct.t1, ptr %a3, i32 0, i32 0
+  %3 = load i64, ptr %coerce.dive3, align 8
+  %coerce.dive4 = getelementptr inbounds %struct.t1, ptr %a3, i32 0, i32 0
+  %4 = load i64, ptr %coerce.dive4, align 8
+  %5 = load [2 x i64], ptr %a2, align 8
+  %call = call i64 @foo1([2 x i64] %0, i64 %1, i64 %2, i64 %3, i64 %4, [2 x i64] %5)
+  ret i64 %call
+; CHECK:             r0 = r3
+; CHECK-NEXT:        r0 += r1
+; CHECK-NEXT:        r5 <<= 2
+; CHECK-NEXT:        r0 += r5
+; CHECK-NEXT:        exit
+}
+
+; Function Attrs: alwaysinline nounwind
+define internal i64 @foo1([2 x i64] %a1.coerce, i64 %a2.coerce, i64 %a3.coerce, i64 %a4.coerce, i64 %a5.coerce, [2 x i64] %a6.coerce) #1 {
+entry:
+  %a1 = alloca %struct.t2, align 8
+  %a2 = alloca %struct.t1, align 8
+  %a3 = alloca %struct.t1, align 8
+  %a4 = alloca %struct.t1, align 8
+  %a5 = alloca %struct.t1, align 8
+  %a6 = alloca %struct.t2, align 8
+  store [2 x i64] %a1.coerce, ptr %a1, align 8
+  %coerce.dive = getelementptr inbounds %struct.t1, ptr %a2, i32 0, i32 0
+  store i64 %a2.coerce, ptr %coerce.dive, align 8
+  %coerce.dive1 = getelementptr inbounds %struct.t1, ptr %a3, i32 0, i32 0
+  store i64 %a3.coerce, ptr %coerce.dive1, align 8
+  %coerce.dive2 = getelementptr inbounds %struct.t1, ptr %a4, i32 0, i32 0
+  store i64 %a4.coerce, ptr %coerce.dive2, align 8
+  %coerce.dive3 = getelementptr inbounds %struct.t1, ptr %a5, i32 0, i32 0
+  store i64 %a5.coerce, ptr %coerce.dive3, align 8
+  store [2 x i64] %a6.coerce, ptr %a6, align 8
+  %a = getelementptr inbounds %struct.t2, ptr %a1, i32 0, i32 0
+  %0 = load i64, ptr %a, align 8, !tbaa !3
+  %a7 = getelementptr inbounds %struct.t1, ptr %a2, i32 0, i32 0
+  %1 = load i64, ptr %a7, align 8, !tbaa !8
+  %add = add nsw i64 %0, %1
+  %a8 = getelementptr inbounds %struct.t1, ptr %a3, i32 0, i32 0
+  %2 = load i64, ptr %a8, align 8, !tbaa !8
+  %add9 = add nsw i64 %add, %2
+  %a10 = getelementptr inbounds %struct.t1, ptr %a4, i32 0, i32 0
+  %3 = load i64, ptr %a10, align 8, !tbaa !8
+  %add11 = add nsw i64 %add9, %3
+  %a12 = getelementptr inbounds %struct.t1, ptr %a5, i32 0, i32 0
+  %4 = load i64, ptr %a12, align 8, !tbaa !8
+  %add13 = add nsw i64 %add11, %4
+  %a14 = getelementptr inbounds %struct.t2, ptr %a6, i32 0, i32 0
+  %5 = load i64, ptr %a14, align 8, !tbaa !3
+  %add15 = add nsw i64 %add13, %5
+  ret i64 %add15
+}
+
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { alwaysinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 9385660f4ca87d074410a84df89faca313afcb5a)"}
+!3 = !{!4, !5, i64 0}
+!4 = !{!"t2", !5, i64 0, !5, i64 8}
+!5 = !{!"long", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !5, i64 0}
+!9 = !{!"t1", !5, i64 0}

diff  --git a/llvm/test/CodeGen/BPF/struct-arg.ll b/llvm/test/CodeGen/BPF/struct-arg.ll
new file mode 100644
index 0000000000000..cfe49df045363
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/struct-arg.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=bpfel | FileCheck %s
+; Source:
+;   struct t1 {
+;     long a;
+;   };
+;   struct t2 {
+;     long a;
+;     long b;
+;   };
+;   long foo2(struct t1 a1, struct t1 a2, struct t1 a3, struct t1 a4, struct t1 a5) {
+;     return a1.a + a2.a + a3.a + a4.a + a5.a;
+;   }
+;   long foo3(struct t2 a1, struct t2 a2, struct t1 a3) {
+;     return a1.a + a2.a + a3.a;
+;   }
+; Compilation flags:
+;   clang -target bpf -S -emit-llvm -O2 t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
+define dso_local i64 @foo2(i64 %a1.coerce, i64 %a2.coerce, i64 %a3.coerce, i64 %a4.coerce, i64 %a5.coerce) local_unnamed_addr #0 {
+entry:
+  %add = add nsw i64 %a2.coerce, %a1.coerce
+  %add8 = add nsw i64 %add, %a3.coerce
+  %add10 = add nsw i64 %add8, %a4.coerce
+  %add12 = add nsw i64 %add10, %a5.coerce
+  ret i64 %add12
+; CHECK:        r0 = r2
+; CHECK:        r0 += r1
+; CHECK:        r0 += r3
+; CHECK:        r0 += r4
+; CHECK:        r0 += r5
+; CHECK-NEXT:   exit
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
+define dso_local i64 @foo3([2 x i64] %a1.coerce, [2 x i64] %a2.coerce, i64 %a3.coerce) local_unnamed_addr #0 {
+entry:
+  %a1.coerce.fca.0.extract = extractvalue [2 x i64] %a1.coerce, 0
+  %a2.coerce.fca.0.extract = extractvalue [2 x i64] %a2.coerce, 0
+  %add = add nsw i64 %a2.coerce.fca.0.extract, %a1.coerce.fca.0.extract
+  %add6 = add nsw i64 %add, %a3.coerce
+  ret i64 %add6
+; CHECK:        r0 = r3
+; CHECK-NEXT:   r0 += r1
+; CHECK-NEXT:   r0 += r5
+; CHECK-NEXT:   exit
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 9385660f4ca87d074410a84df89faca313afcb5a)"}


        


More information about the llvm-commits mailing list