[clang] [llvm] [Clang][BPF] Allow sign extension for int type call parameters (PR #84874)

Tue Mar 12 11:32:58 PDT 2024

https://github.com/yonghong-song updated https://github.com/llvm/llvm-project/pull/84874

>From e2a62512b1693d06dc3421d93de17fc248a6a496 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Mon, 11 Mar 2024 22:27:37 -0700
Subject: [PATCH] [Clang][BPF] Allow sign/zero extension for call parameters
 with int/uint types

Pu Lehui (pulehui at huaweicloud.com) reported an issue in private
that at no_alu32 mode clang may generate code which produced
incorrect result with riscv architecture.

The affected bpf prog is kfunc_call_test4 at bpf selftests
prog/kfunc_call_test.c. The following is the source code:

  long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
  int kfunc_call_test4(struct __sk_buff *skb)
  {
        ...
        tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000);
        ...
  }

For the above code, at no_alu32 mode (-mcpu=v2), the asm code looks like
  0: r1 = -3
  1: r2 = -30
  2: r3 = 0xffffff38 ll // opcode: 18 03 00 00 38 ff ff ff 00 00 00 00 00 00 00 00
  4: r4 = -1000
  5: call bpf_kfunc_call_test4

In bpf_kfunc_call_test4(), arguments with 'char', 'short' and 'long' are
generated correctly, for r1, r2 and r4 in the above. But the argument
r3 is generated with ld_imm64 with zero extension.

Further investigation found that
  - char/short type arguments are signed extended so naturally using MOV insn
  - int type argument are zero extended so using ld_imm64 insn
  - long type argument can do sign extension with 32-bit value so using MOV insn

In riscv case, the 'r3' value (0xffffff38 ll) will be passed to riscv kernel
code which does not do 32-bit sign extension and caused incorrect result.

Why intel/arm64 does not have this issue? x86_64/arm64 supports subrgisters
so for 'int' types, subregisters are directly used hence there is no issue.

Considering BPF is a 64-bit arch, so I think it makes sense at IR level
int/uint type arguments should have the same sign-extension as char/uchar and
short/ushort types. This will solve the above riscv issue.

This patch will cause two codegen changes:
  - for an 'int' constant argument, a MOV insn will be used instead of a ld_imm64.
  - for an 'int' register argument, for cpu=v1/v2, left/right shift will happen.
    for cpu=v3/v4, there is no change from previous behavior as subregisters will
    be used.

Tested with bpf selftests with all of no-alu32, cpu=v3 and cpu=v4, and
all passed.

Signed-off-by: Yonghong Song <yonghong.song at linux.dev>
---
 clang/lib/CodeGen/Targets/BPF.cpp    | 13 ++++++
 clang/test/CodeGen/bpf-abiinfo.c     | 20 +++++++++
 llvm/test/CodeGen/BPF/cc_args_int.ll | 64 ++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+)
 create mode 100644 llvm/test/CodeGen/BPF/cc_args_int.ll

diff --git a/clang/lib/CodeGen/Targets/BPF.cpp b/clang/lib/CodeGen/Targets/BPF.cpp
index 2849222f7a1869..2646789ccb76e7 100644
--- a/clang/lib/CodeGen/Targets/BPF.cpp
+++ b/clang/lib/CodeGen/Targets/BPF.cpp
@@ -22,6 +22,19 @@ class BPFABIInfo : public DefaultABIInfo {
 public:
   BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
+  bool isPromotableIntegerTypeForABI(QualType Ty) const {
+    if (ABIInfo::isPromotableIntegerTypeForABI(Ty) == true)
+      return true;
+
+    if (const auto *BT = Ty->getAs<BuiltinType>()) {
+      // For Int/UInt type, return true to allow proper sign/zero extension.
+      if (BT->getKind() == BuiltinType::Int || BT->getKind() == BuiltinType::UInt)
+        return true;
+    }
+
+    return false;
+  }
+
   ABIArgInfo classifyArgumentType(QualType Ty) const {
     Ty = useFirstFieldIfTransparentUnion(Ty);
 
diff --git a/clang/test/CodeGen/bpf-abiinfo.c b/clang/test/CodeGen/bpf-abiinfo.c
index 366e8003f45572..ca8c7ceff17725 100644
--- a/clang/test/CodeGen/bpf-abiinfo.c
+++ b/clang/test/CodeGen/bpf-abiinfo.c
@@ -22,3 +22,23 @@ int foo_int(void) {
         if (bar_int() != 10) return 0; else return 1;
 }
 // CHECK: %call = call i32 @bar_int()
+
+void sprog1(short, int, int);
+void mprog1() {
+  sprog1(-3, 4, -5);
+// CHECK: call void @sprog1(i16 noundef signext -3, i32 noundef signext 4, i32 noundef signext -5)
+}
+void mprog2(long a, long b) {
+  sprog1(a, b, b);
+// CHECK: call void @sprog1(i16 noundef signext %{{[0-9a-z]+}}, i32 noundef signext %{{[0-9a-z]+}}, i32 noundef signext %{{[0-9a-z]+}})
+}
+
+void sprog2(unsigned short, unsigned int, unsigned int);
+void mprog3() {
+  sprog2(-3, 4, -5);
+// CHECK: call void @sprog2(i16 noundef zeroext -3, i32 noundef zeroext 4, i32 noundef zeroext -5)
+}
+void mprog4(unsigned long a, unsigned long b) {
+  sprog2(a, b, b);
+// CHECK: call void @sprog2(i16 noundef zeroext %{{[0-9a-z]+}}, i32 noundef zeroext %{{[0-9a-z]+}}, i32 noundef zeroext %{{[0-9a-z]+}})
+}
diff --git a/llvm/test/CodeGen/BPF/cc_args_int.ll b/llvm/test/CodeGen/BPF/cc_args_int.ll
new file mode 100644
index 00000000000000..a9447e2184812b
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/cc_args_int.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=bpfel -mcpu=v1 < %s | FileCheck --check-prefix=CHECK-V1 %s
+; RUN: llc -march=bpfel -mcpu=v2 < %s | FileCheck --check-prefix=CHECK-V2 %s
+; RUN: llc -march=bpfel -mcpu=v3 < %s | FileCheck --check-prefix=CHECK-V3 %s
+; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck --check-prefix=CHECK-V4 %s
+
+declare dso_local void @bar1(i16 noundef signext, i32 noundef signext, i32 noundef signext) local_unnamed_addr
+
+define void @test1() {
+entry:
+  tail call void @bar1(i16 noundef signext -3, i32 noundef signext 4, i32 noundef signext -5)
+; CHECK-V1:     r2 = 4
+; CHECK-V1:     r3 = -5
+; CHECK-V2:     r2 = 4
+; CHECK-V2:     r3 = -5
+; CHECK-V3:     w2 = 4
+; CHECK-V3:     w3 = -5
+; CHECK-V4:     w2 = 4
+; CHECK-V4:     w3 = -5
+  ret void
+}
+
+define dso_local void @test2(i64 noundef %a, i64 noundef %b) local_unnamed_addr {
+entry:
+  %conv = trunc i64 %a to i16
+  %conv1 = trunc i64 %b to i32
+  tail call void @bar1(i16 noundef signext %conv, i32 noundef signext %conv1, i32 noundef signext %conv1)
+; CHECK-V1:     r2 <<= 32
+; CHECK-V1:     r2 s>>= 32
+; CHECK-V2:     r2 <<= 32
+; CHECK-V2:     r2 s>>= 32
+; CHECK-V3-NOT: r2 <<= 32
+; CHECK-V4-NOT: r2 <<= 32
+  ret void
+}
+
+declare dso_local void @bar2(i16 noundef zeroext, i32 noundef zeroext, i32 noundef zeroext) local_unnamed_addr
+
+define void @test3() {
+entry:
+  tail call void @bar2(i16 noundef zeroext -3, i32 noundef zeroext 4, i32 noundef zeroext -5)
+; CHECK-V1:     r2 = 4
+; CHECK-V1:     r3 = 4294967291 ll
+; CHECK-V2:     r2 = 4
+; CHECK-V2:     r3 = 4294967291 ll
+; CHECK-V3:     w2 = 4
+; CHECK-V3:     w3 = -5
+; CHECK-V4:     w2 = 4
+; CHECK-V4:     w3 = -5
+  ret void
+}
+
+define dso_local void @test4(i64 noundef %a, i64 noundef %b) local_unnamed_addr {
+entry:
+  %conv = trunc i64 %a to i16
+  %conv1 = trunc i64 %b to i32
+  tail call void @bar2(i16 noundef zeroext %conv, i32 noundef zeroext %conv1, i32 noundef zeroext %conv1)
+; CHECK-V1:     r2 <<= 32
+; CHECK-V1:     r2 >>= 32
+; CHECK-V2:     r2 <<= 32
+; CHECK-V2:     r2 >>= 32
+; CHECK-V3-NOT: r2 <<= 32
+; CHECK-V4-NOT: r2 <<= 32
+  ret void
+}