[llvm] ea72b03 - BPF: make 32bit register spill with 64bit alignment

Yonghong Song via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 20 21:00:45 PDT 2021


Author: Yonghong Song
Date: 2021-09-20T21:00:25-07:00
New Revision: ea72b0319d7b0f0c2fcf41d121afa5d031b319d5

URL: https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5
DIFF: https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5.diff

LOG: BPF: make 32bit register spill with 64bit alignment

In llvm, for non-alu32 mode, the stack alignment is 64bit so only one
64bit spill per 64bit slot. For alu32 mode, the stack alignment
is 32bit, so it is possible to have two 32bit spills per
64bit slot.

Currently, bpf kernel verifier does not preserve register states
for 32bit spills. That is, one 32bit register may hold a constant
value or a bounded range before spill. After reload from the
stack, the information is lost and sometimes this may cause
verifier failure. For 64bit register spill, the verifier
indeed tries to preserve the register state for reloading.

The current verifier can be modestly changed to handle one
32bit spill per 64bit stack slot with state-preserving reload.
Handling two 32bit spills per 64bit stack slot will require
substantial changes.

This patch changes stack alignment for alu32 to be 64bit.
This way, for any 64bit slot in alu32 mode, only one
32bit or 64bit register values can be saved. Together
with previous-mentioned verifier enhancement, 32bit
spill can be handled with state preserving.

Note that llvm stack slot coallescing
seems only doing adjacent packing which may leave some holes
in the stack. For example,
   stack slot 8   <== 8 bytes
   stack slot 4   <== 8 bytes with 4 byte hole
   stack slot 8   <== 8 bytes
   stack slot 4   <== 4 bytes

Differential Revision: https://reviews.llvm.org/D109073

Added: 
    llvm/test/CodeGen/BPF/spill-alu32.ll

Modified: 
    llvm/lib/Target/BPF/BPFRegisterInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/BPF/BPFRegisterInfo.td b/llvm/lib/Target/BPF/BPFRegisterInfo.td
index 88dec063be702..abeef5dc8aad2 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.td
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.td
@@ -36,7 +36,7 @@ foreach I = 0-11 in {
 }
 
 // Register classes.
-def GPR32 : RegisterClass<"BPF", [i32], 32, (add
+def GPR32 : RegisterClass<"BPF", [i32], 64, (add
   (sequence "W%u", 1, 9),
   W0, // Return value
   W11, // Stack Ptr

diff  --git a/llvm/test/CodeGen/BPF/spill-alu32.ll b/llvm/test/CodeGen/BPF/spill-alu32.ll
new file mode 100644
index 0000000000000..c9de589cad3eb
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/spill-alu32.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=bpf -mcpu=v3 < %s | FileCheck %s
+;
+; Source code:
+;   void foo(int, int, int, long, int);
+;   int test(int a, int b, int c, long d, int e) {
+;     foo(a, b, c, d, e);
+;     __asm__ __volatile__ ("":::"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "memory");
+;     foo(a, b, c, d, e);
+;     return 0;
+;   }
+; Compilation flag:
+;   clang -target bpf -S -emit-llvm -O2 -mcpu=v3 t.c
+
+; Function Attrs: nounwind
+define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i64 %d, i32 %e) local_unnamed_addr #0 {
+entry:
+  tail call void @foo(i32 %a, i32 %b, i32 %c, i64 %d, i32 %e) #2
+  tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{memory}"() #2
+
+; CHECK:        *(u32 *)(r10 - 8) = w5
+; CHECK:        *(u64 *)(r10 - 16) = r4
+; CHECK:        *(u32 *)(r10 - 24) = w3
+; CHECK:        *(u32 *)(r10 - 32) = w2
+; CHECK:        *(u32 *)(r10 - 40) = w1
+; CHECK:        call foo
+
+  tail call void @foo(i32 %a, i32 %b, i32 %c, i64 %d, i32 %e) #2
+  ret i32 0
+}
+
+declare dso_local void @foo(i32, i32, i32, i64, i32) local_unnamed_addr #1
+
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" }
+attributes #2 = { nounwind }


        


More information about the llvm-commits mailing list