[llvm] 009f3a8 - BPF: remove intrindics @llvm.stacksave() and @llvm.stackrestore()

Yonghong Song via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 18 09:51:37 PDT 2021


Author: Yonghong Song
Date: 2021-10-18T09:51:19-07:00
New Revision: 009f3a89d833ad5446f9b12326b7a4f54c87c5f5

URL: https://github.com/llvm/llvm-project/commit/009f3a89d833ad5446f9b12326b7a4f54c87c5f5
DIFF: https://github.com/llvm/llvm-project/commit/009f3a89d833ad5446f9b12326b7a4f54c87c5f5.diff

LOG: BPF: remove intrindics @llvm.stacksave() and @llvm.stackrestore()

Paul Chaignon reported a bpf verifier failure ([1]) due to using
non-ABI register R11. For the test case, llvm11 is okay while
llvm12 and later generates verifier unfriendly code.

The failure is related to variable length array size.
The following mimics the variable length array definition
in the test case:

struct t { char a[20]; };
void foo(void *);
int test() {
   const int a = 8;
   char tmp[AA + sizeof(struct t) + a];
   foo(tmp);
   ...
}

Paul helped bisect that the following llvm commit is
responsible:

552c6c232872 ("PR44406: Follow behavior of array bound constant
              folding in more recent versions of GCC.")

Basically, before the above commit, clang frontend did constant
folding for array size "AA + sizeof(struct t) + a" to be 68,
so used alloca for stack allocation. After the above commit,
clang frontend didn't do constant folding for array size
any more, which results in a VLA and llvm.stacksave/llvm.stackrestore
is generated.

BPF architecture API does not support stack pointer (sp) register.
The LLVM internally used R11 to indicate sp register but it should
not be in the final code. Otherwise, kernel verifier will reject it.

The early patch ([2]) tried to fix the issue in clang frontend.
But the upstream discussion considered frontend fix is really a
hack and the backend should properly undo llvm.stacksave/llvm.stackrestore.
This patch implemented a bpf IR phase to remove these intrinsics
unconditionally. If eventually the alloca can be resolved with
constant size, r11 will not be generated. If alloca cannot be
resolved with constant size, SelectionDag will complain, the same
as without this patch.

 [1] https://lore.kernel.org/bpf/20210809151202.GB1012999@Mem/
 [2] https://reviews.llvm.org/D107882

Differential Revision: https://reviews.llvm.org/D111897

Added: 
    llvm/lib/Target/BPF/BPFIRPeephole.cpp
    llvm/test/CodeGen/BPF/vla.ll

Modified: 
    llvm/lib/Target/BPF/BPF.h
    llvm/lib/Target/BPF/BPFTargetMachine.cpp
    llvm/lib/Target/BPF/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index a98a3e08d5de7..89990f7e15c28 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -21,6 +21,7 @@ ModulePass *createBPFCheckAndAdjustIR();
 
 FunctionPass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
 FunctionPass *createBPFPreserveDIType();
+FunctionPass *createBPFIRPeephole();
 FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
 FunctionPass *createBPFMISimplifyPatchablePass();
 FunctionPass *createBPFMIPeepholePass();
@@ -33,6 +34,7 @@ void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
 
 void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &);
 void initializeBPFPreserveDITypePass(PassRegistry&);
+void initializeBPFIRPeepholePass(PassRegistry&);
 void initializeBPFMISimplifyPatchablePass(PassRegistry&);
 void initializeBPFMIPeepholePass(PassRegistry&);
 void initializeBPFMIPeepholeTruncElimPass(PassRegistry&);
@@ -57,6 +59,13 @@ class BPFPreserveDITypePass : public PassInfoMixin<BPFPreserveDITypePass> {
   static bool isRequired() { return true; }
 };
 
+class BPFIRPeepholePass : public PassInfoMixin<BPFIRPeepholePass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  static bool isRequired() { return true; }
+};
+
 class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
 public:
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);

diff  --git a/llvm/lib/Target/BPF/BPFIRPeephole.cpp b/llvm/lib/Target/BPF/BPFIRPeephole.cpp
new file mode 100644
index 0000000000000..9192205105e33
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFIRPeephole.cpp
@@ -0,0 +1,115 @@
+//===------------ BPFIRPeephole.cpp - IR Peephole Transformation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// IR level peephole optimization, specifically removing @llvm.stacksave() and
+// @llvm.stackrestore().
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+
+#define DEBUG_TYPE "bpf-ir-peephole"
+
+using namespace llvm;
+
+namespace {
+
+static bool BPFIRPeepholeImpl(Function &F) {
+  LLVM_DEBUG(dbgs() << "******** BPF IR Peephole ********\n");
+
+  Instruction *ToErase = nullptr;
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      // The following code pattern is handled:
+      //     %3 = call i8* @llvm.stacksave()
+      //     store i8* %3, i8** %saved_stack, align 8
+      //     ...
+      //     %4 = load i8*, i8** %saved_stack, align 8
+      //     call void @llvm.stackrestore(i8* %4)
+      //     ...
+      // The goal is to remove the above four instructions,
+      // so we won't have instructions with r11 (stack pointer)
+      // if eventually there is no variable length stack allocation.
+      // InstrCombine also tries to remove the above instructions,
+      // if it is proven safe (constant alloca etc.), but depending
+      // on code pattern, it may still miss some.
+      //
+      // With unconditionally removing these instructions, if alloca is
+      // constant, we are okay then. Otherwise, SelectionDag will complain
+      // since BPF does not support dynamic allocation yet.
+      if (ToErase) {
+        ToErase->eraseFromParent();
+        ToErase = nullptr;
+      }
+
+      if (auto *Call = dyn_cast<CallInst>(&I)) {
+        if (auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand())) {
+          if (!GV->getName().equals("llvm.stacksave"))
+            continue;
+          if (!Call->hasOneUser())
+            continue;
+          auto *Inst = cast<Instruction>(*Call->user_begin());
+          LLVM_DEBUG(dbgs() << "Remove:"; I.dump());
+          LLVM_DEBUG(dbgs() << "Remove:"; Inst->dump(); dbgs() << '\n');
+          Inst->eraseFromParent();
+          ToErase = &I;
+        }
+        continue;
+      }
+
+      if (auto *LD = dyn_cast<LoadInst>(&I)) {
+        if (!LD->hasOneUser())
+          continue;
+        auto *Call = dyn_cast<CallInst>(*LD->user_begin());
+        if (!Call)
+          continue;
+        auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
+        if (!GV)
+          continue;
+        if (!GV->getName().equals("llvm.stackrestore"))
+          continue;
+        LLVM_DEBUG(dbgs() << "Remove:"; I.dump());
+        LLVM_DEBUG(dbgs() << "Remove:"; Call->dump(); dbgs() << '\n');
+        Call->eraseFromParent();
+        ToErase = &I;
+      }
+    }
+  }
+
+  return false;
+}
+
+class BPFIRPeephole final : public FunctionPass {
+  bool runOnFunction(Function &F) override;
+
+public:
+  static char ID;
+  BPFIRPeephole() : FunctionPass(ID) {}
+};
+} // End anonymous namespace
+
+char BPFIRPeephole::ID = 0;
+INITIALIZE_PASS(BPFIRPeephole, DEBUG_TYPE, "BPF IR Peephole", false, false)
+
+FunctionPass *llvm::createBPFIRPeephole() { return new BPFIRPeephole(); }
+
+bool BPFIRPeephole::runOnFunction(Function &F) { return BPFIRPeepholeImpl(F); }
+
+PreservedAnalyses BPFIRPeepholePass::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  return BPFIRPeepholeImpl(F) ? PreservedAnalyses::none()
+                              : PreservedAnalyses::all();
+}

diff  --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index b383e4cace9e5..2fb76ab5c440e 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -43,6 +43,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
   PassRegistry &PR = *PassRegistry::getPassRegistry();
   initializeBPFAbstractMemberAccessLegacyPassPass(PR);
   initializeBPFPreserveDITypePass(PR);
+  initializeBPFIRPeepholePass(PR);
   initializeBPFAdjustOptPass(PR);
   initializeBPFCheckAndAdjustIRPass(PR);
   initializeBPFMIPeepholePass(PR);
@@ -107,6 +108,7 @@ void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
       [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
         PM.add(createBPFAbstractMemberAccess(this));
         PM.add(createBPFPreserveDIType());
+        PM.add(createBPFIRPeephole());
       });
 
   Builder.addExtension(
@@ -128,6 +130,7 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
         FunctionPassManager FPM;
         FPM.addPass(BPFAbstractMemberAccessPass(this));
         FPM.addPass(BPFPreserveDITypePass());
+        FPM.addPass(BPFIRPeepholePass());
         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
       });
   PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM,

diff  --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index 2d804ca8a73eb..6ad0127e0dfe7 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_target(BPFCodeGen
   BPFCheckAndAdjustIR.cpp
   BPFFrameLowering.cpp
   BPFInstrInfo.cpp
+  BPFIRPeephole.cpp
   BPFISelDAGToDAG.cpp
   BPFISelLowering.cpp
   BPFMCInstLower.cpp

diff  --git a/llvm/test/CodeGen/BPF/vla.ll b/llvm/test/CodeGen/BPF/vla.ll
new file mode 100644
index 0000000000000..71eedb1d223bd
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/vla.ll
@@ -0,0 +1,115 @@
+; RUN: opt --bpf-ir-peephole -mtriple=bpf-pc-linux -S %s | FileCheck %s
+; Source:
+;   #define AA 40
+;   struct t {
+;     char a[20];
+;   };
+;   void foo(void *);
+;
+;   int test1() {
+;     const int a = 8;
+;     char tmp[AA + sizeof(struct t) + a];
+;     foo(tmp);
+;     return 0;
+;   }
+;
+;   int test2(int b) {
+;     const int a = 8;
+;     char tmp[a + b];
+;     foo(tmp);
+;     return 0;
+;   }
+; Compilation flag:
+;   clang -target bpf -O2 -S -emit-llvm t.c -Xclang -disable-llvm-passes
+
+source_filename = "t.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "bpf"
+
+; Function Attrs: nounwind
+define dso_local i32 @test1() #0 {
+entry:
+  %a = alloca i32, align 4
+  %saved_stack = alloca i8*, align 8
+  %0 = bitcast i32* %a to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4
+  store i32 8, i32* %a, align 4, !tbaa !3
+  %1 = call i8* @llvm.stacksave()
+  store i8* %1, i8** %saved_stack, align 8
+  %vla = alloca i8, i64 68, align 1
+  call void @foo(i8* %vla)
+  %2 = load i8*, i8** %saved_stack, align 8
+  call void @llvm.stackrestore(i8* %2)
+  %3 = bitcast i32* %a to i8*
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %3) #4
+  ret i32 0
+}
+
+; CHECK:       define dso_local i32 @test1
+; CHECK-NOT:   %[[#]] = call i8* @llvm.stacksave()
+; CHECK-NOT:   store i8* %[[#]], i8** %saved_stack, align 8
+; CHECK-NOT:   %[[#]] = load i8*, i8** %saved_stack, align 8
+; CHECK-NOT:   call void @llvm.stackrestore(i8* %[[#]])
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nofree nosync nounwind willreturn
+declare i8* @llvm.stacksave() #2
+
+declare dso_local void @foo(i8*) #3
+
+; Function Attrs: nofree nosync nounwind willreturn
+declare void @llvm.stackrestore(i8*) #2
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind
+define dso_local i32 @test2(i32 %b) #0 {
+entry:
+  %b.addr = alloca i32, align 4
+  %a = alloca i32, align 4
+  %saved_stack = alloca i8*, align 8
+  %__vla_expr0 = alloca i64, align 8
+  store i32 %b, i32* %b.addr, align 4, !tbaa !3
+  %0 = bitcast i32* %a to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4
+  store i32 8, i32* %a, align 4, !tbaa !3
+  %1 = load i32, i32* %b.addr, align 4, !tbaa !3
+  %add = add nsw i32 8, %1
+  %2 = zext i32 %add to i64
+  %3 = call i8* @llvm.stacksave()
+  store i8* %3, i8** %saved_stack, align 8
+  %vla = alloca i8, i64 %2, align 1
+  store i64 %2, i64* %__vla_expr0, align 8
+  call void @foo(i8* %vla)
+  %4 = load i8*, i8** %saved_stack, align 8
+  call void @llvm.stackrestore(i8* %4)
+  %5 = bitcast i32* %a to i8*
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #4
+  ret i32 0
+}
+
+; CHECK:       define dso_local i32 @test2
+; CHECK-NOT:   %[[#]] = call i8* @llvm.stacksave()
+; CHECK-NOT:   store i8* %[[#]], i8** %saved_stack, align 8
+; CHECK-NOT:   %[[#]] = load i8*, i8** %saved_stack, align 8
+; CHECK-NOT:   call void @llvm.stackrestore(i8* %[[#]])
+
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { argmemonly nofree nosync nounwind willreturn }
+attributes #2 = { nofree nosync nounwind willreturn }
+attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #4 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 64c5d5c671fb5b5f25c464652a4eec2cf743af0d)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}


        


More information about the llvm-commits mailing list