[llvm] [llubi] Add support for common library function calls (PR #185645)
Zhige Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 06:17:54 PDT 2026
https://github.com/nofe1248 created https://github.com/llvm/llvm-project/pull/185645
Implements some common library function calls for llubi, including: printf, puts, malloc, calloc, free, exit, terminate, abort.
Also partially refactored the source file structure to allow more code reuse.
>From d3db9193e899192950a2422bdd1fffeb2922f039 Mon Sep 17 00:00:00 2001
From: Zhige Chen <zhige_chen at outlook.com>
Date: Tue, 10 Mar 2026 20:39:45 +0800
Subject: [PATCH] [llubi] Add support for common library function calls
---
llvm/test/tools/llubi/lib_cxx_memory.ll | 21 +
llvm/test/tools/llubi/lib_double_free.ll | 21 +
llvm/test/tools/llubi/lib_exit.ll | 30 +
llvm/test/tools/llubi/lib_io.ll | 37 +
llvm/test/tools/llubi/lib_memory.ll | 33 +
llvm/test/tools/llubi/lib_printf_format.ll | 51 +
llvm/test/tools/llubi/lib_uninit_string.ll | 18 +
llvm/tools/llubi/lib/CMakeLists.txt | 1 +
llvm/tools/llubi/lib/Context.h | 32 +-
llvm/tools/llubi/lib/Interpreter.cpp | 1894 ++++++++++----------
llvm/tools/llubi/lib/Interpreter.h | 211 +++
llvm/tools/llubi/lib/Library.cpp | 327 ++++
llvm/tools/llubi/lib/Library.h | 51 +
llvm/tools/llubi/llubi.cpp | 49 +-
14 files changed, 1789 insertions(+), 987 deletions(-)
create mode 100644 llvm/test/tools/llubi/lib_cxx_memory.ll
create mode 100644 llvm/test/tools/llubi/lib_double_free.ll
create mode 100644 llvm/test/tools/llubi/lib_exit.ll
create mode 100644 llvm/test/tools/llubi/lib_io.ll
create mode 100644 llvm/test/tools/llubi/lib_memory.ll
create mode 100644 llvm/test/tools/llubi/lib_printf_format.ll
create mode 100644 llvm/test/tools/llubi/lib_uninit_string.ll
create mode 100644 llvm/tools/llubi/lib/Interpreter.h
create mode 100644 llvm/tools/llubi/lib/Library.cpp
create mode 100644 llvm/tools/llubi/lib/Library.h
diff --git a/llvm/test/tools/llubi/lib_cxx_memory.ll b/llvm/test/tools/llubi/lib_cxx_memory.ll
new file mode 100644
index 0000000000000..7df4952e6a4c0
--- /dev/null
+++ b/llvm/test/tools/llubi/lib_cxx_memory.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: llubi --verbose < %s 2>&1 | FileCheck %s
+
+declare ptr @_Znwm(i64) ; new(unsigned long)
+declare void @_ZdlPv(ptr) ; delete(void*)
+
+define i32 @main() {
+entry:
+ %ptr = call ptr @_Znwm(i64 8)
+ store i64 42, ptr %ptr
+
+ call void @_ZdlPv(ptr %ptr)
+ ret i32 0
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %ptr = call ptr @_Znwm(i64 8) => ptr 0x10 [ptr]
+; CHECK-NEXT: store i64 42, ptr %ptr, align 4
+; CHECK-NEXT: call void @_ZdlPv(ptr %ptr)
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: Exiting function: main
+; CHECK-NEXT: Program exited with code 0
diff --git a/llvm/test/tools/llubi/lib_double_free.ll b/llvm/test/tools/llubi/lib_double_free.ll
new file mode 100644
index 0000000000000..2441d69f6628a
--- /dev/null
+++ b/llvm/test/tools/llubi/lib_double_free.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: not llubi --verbose < %s 2>&1 | FileCheck %s
+
+declare ptr @malloc(i64)
+declare void @free(ptr)
+
+define i32 @main() {
+entry:
+ %ptr = call ptr @malloc(i64 4)
+
+ call void @free(ptr %ptr)
+
+ call void @free(ptr %ptr)
+
+ ret i32 0
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %ptr = call ptr @malloc(i64 4) => ptr 0x10 [ptr]
+; CHECK-NEXT: call void @free(ptr %ptr)
+; CHECK-NEXT: Immediate UB detected: freeing an invalid, unallocated, or already freed pointer.
+; CHECK-NEXT: error: Execution of function 'main' failed.
diff --git a/llvm/test/tools/llubi/lib_exit.ll b/llvm/test/tools/llubi/lib_exit.ll
new file mode 100644
index 0000000000000..23bad47e22d85
--- /dev/null
+++ b/llvm/test/tools/llubi/lib_exit.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: not llubi --verbose < %s 2>&1 | FileCheck %s
+
+declare void @exit(i32)
+declare i32 @puts(ptr)
+
+define i32 @main() {
+entry:
+ %before = alloca [7 x i8]
+ store [7 x i8] c"Before\00", ptr %before
+
+ %after = alloca [6 x i8]
+ store [6 x i8] c"After\00", ptr %after
+
+ %0 = call i32 @puts(ptr %before)
+
+ call void @exit(i32 42)
+
+ %1 = call i32 @puts(ptr %after)
+
+ ret i32 0
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %before = alloca [7 x i8], align 1 => ptr 0x8 [before]
+; CHECK-NEXT: store [7 x i8] c"Before\00", ptr %before, align 1
+; CHECK-NEXT: %after = alloca [6 x i8], align 1 => ptr 0xF [after]
+; CHECK-NEXT: store [6 x i8] c"After\00", ptr %after, align 1
+; CHECK-NEXT: %0 = call i32 @puts(ptr %before) => i32 1
+; CHECK-NEXT: Program exited with code 42
+; CHECK-NEXT: Before
diff --git a/llvm/test/tools/llubi/lib_io.ll b/llvm/test/tools/llubi/lib_io.ll
new file mode 100644
index 0000000000000..875ebbd18a1be
--- /dev/null
+++ b/llvm/test/tools/llubi/lib_io.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: llubi --verbose < %s 2>&1 | FileCheck %s
+
+declare i32 @printf(ptr, ...)
+declare i32 @puts(ptr)
+
+define i32 @main() {
+entry:
+ %puts.str = alloca [13 x i8]
+ store [13 x i8] c"Hello, puts!\00", ptr %puts.str
+
+ %0 = call i32 @puts(ptr %puts.str)
+
+ %fmt.str = alloca [18 x i8]
+ store [18 x i8] c"Int: %d, Str: %s\0A\00", ptr %fmt.str
+
+ %arg.str = alloca [5 x i8]
+ store [5 x i8] c"test\00", ptr %arg.str
+
+ %1 = call i32 (ptr, ...) @printf(ptr %fmt.str, i32 42, ptr %arg.str)
+
+ ret i32 0
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %puts.str = alloca [13 x i8], align 1 => ptr 0x8 [puts.str]
+; CHECK-NEXT: store [13 x i8] c"Hello, puts!\00", ptr %puts.str, align 1
+; CHECK-NEXT: %0 = call i32 @puts(ptr %puts.str) => i32 1
+; CHECK-NEXT: %fmt.str = alloca [18 x i8], align 1 => ptr 0x15 [fmt.str]
+; CHECK-NEXT: store [18 x i8] c"Int: %d, Str: %s\0A\00", ptr %fmt.str, align 1
+; CHECK-NEXT: %arg.str = alloca [5 x i8], align 1 => ptr 0x27 [arg.str]
+; CHECK-NEXT: store [5 x i8] c"test\00", ptr %arg.str, align 1
+; CHECK-NEXT: %1 = call i32 (ptr, ...) @printf(ptr %fmt.str, i32 42, ptr %arg.str) => i32 19
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: Exiting function: main
+; CHECK-NEXT: Program exited with code 0
+; CHECK-NEXT: Hello, puts!
+; CHECK-NEXT: Int: 42, Str: test
diff --git a/llvm/test/tools/llubi/lib_memory.ll b/llvm/test/tools/llubi/lib_memory.ll
new file mode 100644
index 0000000000000..29babc50a61ee
--- /dev/null
+++ b/llvm/test/tools/llubi/lib_memory.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: llubi --verbose < %s 2>&1 | FileCheck %s
+
+declare ptr @malloc(i64)
+declare ptr @calloc(i64, i64)
+declare void @free(ptr)
+
+define i32 @main() {
+entry:
+ %ptr1 = call ptr @malloc(i64 4)
+ store i32 100, ptr %ptr1
+
+ %ptr2 = call ptr @calloc(i64 1, i64 4)
+
+ %val1 = load i32, ptr %ptr1
+ %val2 = load i32, ptr %ptr2
+
+ call void @free(ptr %ptr1)
+ call void @free(ptr %ptr2)
+
+ ret i32 0
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %ptr1 = call ptr @malloc(i64 4) => ptr 0x10 [ptr1]
+; CHECK-NEXT: store i32 100, ptr %ptr1, align 4
+; CHECK-NEXT: %ptr2 = call ptr @calloc(i64 1, i64 4) => ptr 0x20 [ptr2]
+; CHECK-NEXT: %val1 = load i32, ptr %ptr1, align 4 => i32 100
+; CHECK-NEXT: %val2 = load i32, ptr %ptr2, align 4 => i32 0
+; CHECK-NEXT: call void @free(ptr %ptr1)
+; CHECK-NEXT: call void @free(ptr %ptr2)
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: Exiting function: main
+; CHECK-NEXT: Program exited with code 0
diff --git a/llvm/test/tools/llubi/lib_printf_format.ll b/llvm/test/tools/llubi/lib_printf_format.ll
new file mode 100644
index 0000000000000..692c9fb501a9c
--- /dev/null
+++ b/llvm/test/tools/llubi/lib_printf_format.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: llubi --verbose < %s 2>&1 | FileCheck %s
+
+declare i32 @printf(ptr, ...)
+
+define i32 @main() {
+entry:
+ %fmt_int = alloca [36 x i8]
+ store [36 x i8] c"Ints: %d, %i, %u, %o, %x, %X, %05d\0A\00", ptr %fmt_int
+
+ %fmt_len = alloca [35 x i8]
+ store [35 x i8] c"Lengths: %ld, %lld, %hd, %hhu, %c\0A\00", ptr %fmt_len
+
+ %fmt_str_ptr = alloca [18 x i8]
+ store [18 x i8] c"Str: %s, Ptr: %p\0A\00", ptr %fmt_str_ptr
+
+ %fmt_pct = alloca [15 x i8]
+ store [15 x i8] c"Percent: %d%%\0A\00", ptr %fmt_pct
+
+ %dummy_str = alloca [6 x i8]
+ store [6 x i8] c"llubi\00", ptr %dummy_str
+
+ call i32 (ptr, ...) @printf(ptr %fmt_int, i32 42, i32 -42, i32 255, i32 255, i32 255, i32 255, i32 42)
+ call i32 (ptr, ...) @printf(ptr %fmt_len, i64 123456789, i64 987654321, i32 100, i32 50, i32 65)
+ call i32 (ptr, ...) @printf(ptr %fmt_str_ptr, ptr %dummy_str, ptr %dummy_str)
+ call i32 (ptr, ...) @printf(ptr %fmt_pct, i32 100)
+
+ ret i32 0
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %fmt_int = alloca [36 x i8], align 1 => ptr 0x8 [fmt_int]
+; CHECK-NEXT: store [36 x i8] c"Ints: %d, %i, %u, %o, %x, %X, %05d\0A\00", ptr %fmt_int, align 1
+; CHECK-NEXT: %fmt_len = alloca [35 x i8], align 1 => ptr 0x2C [fmt_len]
+; CHECK-NEXT: store [35 x i8] c"Lengths: %ld, %lld, %hd, %hhu, %c\0A\00", ptr %fmt_len, align 1
+; CHECK-NEXT: %fmt_str_ptr = alloca [18 x i8], align 1 => ptr 0x4F [fmt_str_ptr]
+; CHECK-NEXT: store [18 x i8] c"Str: %s, Ptr: %p\0A\00", ptr %fmt_str_ptr, align 1
+; CHECK-NEXT: %fmt_pct = alloca [15 x i8], align 1 => ptr 0x61 [fmt_pct]
+; CHECK-NEXT: store [15 x i8] c"Percent: %d%%\0A\00", ptr %fmt_pct, align 1
+; CHECK-NEXT: %dummy_str = alloca [6 x i8], align 1 => ptr 0x70 [dummy_str]
+; CHECK-NEXT: store [6 x i8] c"llubi\00", ptr %dummy_str, align 1
+; CHECK-NEXT: %0 = call i32 (ptr, ...) @printf(ptr %fmt_int, i32 42, i32 -42, i32 255, i32 255, i32 255, i32 255, i32 42) => i32 39
+; CHECK-NEXT: %1 = call i32 (ptr, ...) @printf(ptr %fmt_len, i64 123456789, i64 987654321, i32 100, i32 50, i32 65) => i32 42
+; CHECK-NEXT: %2 = call i32 (ptr, ...) @printf(ptr %fmt_str_ptr, ptr %dummy_str, ptr %dummy_str) => i32 22
+; CHECK-NEXT: %3 = call i32 (ptr, ...) @printf(ptr %fmt_pct, i32 100) => i32 14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: Exiting function: main
+; CHECK-NEXT: Program exited with code 0
+; CHECK-NEXT: Ints: 42, -42, 255, 377, ff, FF, 00042
+; CHECK-NEXT: Lengths: 123456789, 987654321, 100, 50, A
+; CHECK-NEXT: Str: llubi, Ptr: 0x70
+; CHECK-NEXT: Percent: 100%
diff --git a/llvm/test/tools/llubi/lib_uninit_string.ll b/llvm/test/tools/llubi/lib_uninit_string.ll
new file mode 100644
index 0000000000000..7274cfdb63363
--- /dev/null
+++ b/llvm/test/tools/llubi/lib_uninit_string.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: not llubi --verbose < %s 2>&1 | FileCheck %s
+
+declare ptr @malloc(i64)
+declare i32 @puts(ptr)
+
+define i32 @main() {
+entry:
+ %ptr = call ptr @malloc(i64 10)
+
+ %1 = call i32 @puts(ptr %ptr)
+
+ ret i32 0
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %ptr = call ptr @malloc(i64 10) => ptr 0x10 [ptr]
+; CHECK-NEXT: Immediate UB detected: Read uninitialized or poison memory while parsing C-string.
+; CHECK-NEXT: error: Execution of function 'main' failed.
diff --git a/llvm/tools/llubi/lib/CMakeLists.txt b/llvm/tools/llubi/lib/CMakeLists.txt
index d3b54d0bd45b5..af60e43b32e0b 100644
--- a/llvm/tools/llubi/lib/CMakeLists.txt
+++ b/llvm/tools/llubi/lib/CMakeLists.txt
@@ -8,5 +8,6 @@ add_llvm_library(LLVMUBAwareInterpreter
STATIC
Context.cpp
Interpreter.cpp
+ Library.cpp
Value.cpp
)
diff --git a/llvm/tools/llubi/lib/Context.h b/llvm/tools/llubi/lib/Context.h
index a250004b3cb54..735fd4dc38f6b 100644
--- a/llvm/tools/llubi/lib/Context.h
+++ b/llvm/tools/llubi/lib/Context.h
@@ -14,6 +14,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Module.h"
#include <map>
+#include <optional>
#include <random>
namespace llvm::ubi {
@@ -47,6 +48,24 @@ enum class UndefValueBehavior {
Zero, // All uses of the undef value yield zero.
};
+enum class ProgramExitKind {
+ Returned,
+ Exit,
+ Abort,
+ Terminate,
+};
+
+enum class ExecutionStatus {
+ Completed,
+ ProgramExited,
+ Failed,
+};
+
+struct ProgramExitInfo {
+ ProgramExitKind Kind;
+ uint64_t ExitCode;
+};
+
class MemoryObject : public RefCountedBase<MemoryObject> {
uint64_t Address;
uint64_t Size;
@@ -110,6 +129,9 @@ class EventHandler {
virtual bool onFunctionExit(Function &F, const AnyValue &RetVal) {
return true;
}
+ virtual bool onProgramExit(ProgramExitKind Kind, uint64_t ExitCode = 0) {
+ return true;
+ }
virtual bool onPrint(StringRef Msg) {
outs() << Msg;
return true;
@@ -238,10 +260,12 @@ class Context {
bool initGlobalValues();
/// Execute the function \p F with arguments \p Args, and store the return
/// value in \p RetVal if the function is not void.
- /// Returns true if the function executed successfully. False indicates an
- /// error occurred during execution.
- bool runFunction(Function &F, ArrayRef<AnyValue> Args, AnyValue &RetVal,
- EventHandler &Handler);
+ /// Returns how execution ended. If it ended via a library-triggered program
+ /// termination (e.g., exit/abort/terminate), \p ExitInfo is populated.
+ ExecutionStatus runFunction(Function &F, ArrayRef<AnyValue> Args,
+ AnyValue &RetVal, EventHandler &Handler,
+ std::optional<ProgramExitInfo> *ExitInfo =
+ nullptr);
};
} // namespace llvm::ubi
diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp
index dd5530a355538..b0d680d8a21f8 100644
--- a/llvm/tools/llubi/lib/Interpreter.cpp
+++ b/llvm/tools/llubi/lib/Interpreter.cpp
@@ -10,11 +10,10 @@
//
//===----------------------------------------------------------------------===//
-#include "Context.h"
-#include "Value.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "Interpreter.h"
+#include "Library.h"
+
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
@@ -23,65 +22,6 @@ namespace llvm::ubi {
using namespace PatternMatch;
-enum class FrameState {
- // It is about to enter the function.
- // Valid transition:
- // -> Running
- Entry,
- // It is executing instructions inside the function.
- // Valid transitions:
- // -> Pending (on call)
- // -> Exit (on return)
- Running,
- // It is about to enter a callee or handle return value from the callee.
- // Valid transitions:
- // -> Running (after returning from callee)
- Pending,
- // It is about to return the control to the caller.
- Exit,
-};
-
-/// Context for a function call.
-/// This struct maintains the state during the execution of a function,
-/// including the control flow, values of executed instructions, and stack
-/// objects.
-struct Frame {
- Function &Func;
- Frame *LastFrame;
- CallBase *CallSite;
- ArrayRef<AnyValue> Args;
- AnyValue &RetVal;
-
- TargetLibraryInfo TLI;
- BasicBlock *BB;
- BasicBlock::iterator PC;
- FrameState State = FrameState::Entry;
- // Stack objects allocated in this frame. They will be automatically freed
- // when the function returns.
- SmallVector<IntrusiveRefCntPtr<MemoryObject>> Allocas;
- // Values of arguments and executed instructions in this function.
- DenseMap<Value *, AnyValue> ValueMap;
-
- // Reserved for in-flight subroutines.
- Function *ResolvedCallee = nullptr;
- SmallVector<AnyValue> CalleeArgs;
- AnyValue CalleeRetVal;
-
- Frame(Function &F, CallBase *CallSite, Frame *LastFrame,
- ArrayRef<AnyValue> Args, AnyValue &RetVal,
- const TargetLibraryInfoImpl &TLIImpl)
- : Func(F), LastFrame(LastFrame), CallSite(CallSite), Args(Args),
- RetVal(RetVal), TLI(TLIImpl, &F) {
- assert((Args.size() == F.arg_size() ||
- (F.isVarArg() && Args.size() >= F.arg_size())) &&
- "Expected enough arguments to call the function.");
- BB = &Func.getEntryBlock();
- PC = BB->begin();
- for (Argument &Arg : F.args())
- ValueMap[&Arg] = Args[Arg.getArgNo()];
- }
-};
-
static AnyValue addNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW,
bool HasNUW) {
APInt Res = LHS + RHS;
@@ -118,1064 +58,1062 @@ static AnyValue mulNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW,
return Res;
}
-/// Instruction executor using the visitor pattern.
-/// Unlike the Context class that manages the global state,
-/// InstExecutor only maintains the state for call frames.
-class InstExecutor : public InstVisitor<InstExecutor, void> {
- Context &Ctx;
- const DataLayout &DL;
- EventHandler &Handler;
- std::list<Frame> CallStack;
- // Used to indicate whether the interpreter should continue execution.
- bool Status;
- Frame *CurrentFrame = nullptr;
- AnyValue None;
-
- void reportImmediateUB(StringRef Msg) {
- // Check if we have already reported an immediate UB.
- if (!Status)
- return;
- Status = false;
- // TODO: Provide stack trace information.
- Handler.onImmediateUB(Msg);
- }
+void InstExecutor::reportImmediateUB(StringRef Msg) {
+ // Check if we have already reported an immediate UB.
+ if (!Status)
+ return;
+ Status = false;
+ // TODO: Provide stack trace information.
+ Handler.onImmediateUB(Msg);
+}
- void reportError(StringRef Msg) {
- // Check if we have already reported an error message.
- if (!Status)
- return;
- Status = false;
- Handler.onError(Msg);
- }
+void InstExecutor::reportError(StringRef Msg) {
+ // Check if we have already reported an error message.
+ if (!Status)
+ return;
+ Status = false;
+ Handler.onError(Msg);
+}
- const AnyValue &getValue(Value *V) {
- if (auto *C = dyn_cast<Constant>(V))
- return Ctx.getConstantValue(C);
- return CurrentFrame->ValueMap.at(V);
- }
+const AnyValue &InstExecutor::getValue(Value *V) {
+ if (auto *C = dyn_cast<Constant>(V))
+ return Ctx.getConstantValue(C);
+ return CurrentFrame->ValueMap.at(V);
+}
- void setResult(Instruction &I, AnyValue V) {
- if (Status)
- Status &= Handler.onInstructionExecuted(I, V);
- CurrentFrame->ValueMap.insert_or_assign(&I, std::move(V));
- }
+void InstExecutor::setResult(Instruction &I, AnyValue V) {
+ if (Status)
+ Status &= Handler.onInstructionExecuted(I, V);
+ CurrentFrame->ValueMap.insert_or_assign(&I, std::move(V));
+}
- AnyValue computeUnOp(Type *Ty, const AnyValue &Operand,
- function_ref<AnyValue(const AnyValue &)> ScalarFn) {
- if (Ty->isVectorTy()) {
- auto &OperandVec = Operand.asAggregate();
- std::vector<AnyValue> ResVec;
- ResVec.reserve(OperandVec.size());
- for (const auto &Scalar : OperandVec)
- ResVec.push_back(ScalarFn(Scalar));
- return std::move(ResVec);
- }
- return ScalarFn(Operand);
+AnyValue
+InstExecutor::computeUnOp(Type *Ty, const AnyValue &Operand,
+ function_ref<AnyValue(const AnyValue &)> ScalarFn) {
+ if (Ty->isVectorTy()) {
+ auto &OperandVec = Operand.asAggregate();
+ std::vector<AnyValue> ResVec;
+ ResVec.reserve(OperandVec.size());
+ for (const auto &Scalar : OperandVec)
+ ResVec.push_back(ScalarFn(Scalar));
+ return std::move(ResVec);
}
+ return ScalarFn(Operand);
+}
- void visitUnOp(Instruction &I,
- function_ref<AnyValue(const AnyValue &)> ScalarFn) {
- setResult(I, computeUnOp(I.getType(), getValue(I.getOperand(0)), ScalarFn));
- }
+void InstExecutor::visitUnOp(
+ Instruction &I, function_ref<AnyValue(const AnyValue &)> ScalarFn) {
+ setResult(I, computeUnOp(I.getType(), getValue(I.getOperand(0)), ScalarFn));
+}
- void visitIntUnOp(Instruction &I,
- function_ref<AnyValue(const APInt &)> ScalarFn) {
- visitUnOp(I, [&](const AnyValue &Operand) -> AnyValue {
- if (Operand.isPoison())
- return AnyValue::poison();
- return ScalarFn(Operand.asInteger());
- });
- }
+void InstExecutor::visitIntUnOp(
+ Instruction &I, function_ref<AnyValue(const APInt &)> ScalarFn) {
+ visitUnOp(I, [&](const AnyValue &Operand) -> AnyValue {
+ if (Operand.isPoison())
+ return AnyValue::poison();
+ return ScalarFn(Operand.asInteger());
+ });
+}
- AnyValue computeBinOp(
- Type *Ty, const AnyValue &LHS, const AnyValue &RHS,
- function_ref<AnyValue(const AnyValue &, const AnyValue &)> ScalarFn) {
- if (Ty->isVectorTy()) {
- auto &LHSVec = LHS.asAggregate();
- auto &RHSVec = RHS.asAggregate();
- std::vector<AnyValue> ResVec;
- ResVec.reserve(LHSVec.size());
- for (const auto &[ScalarLHS, ScalarRHS] : zip(LHSVec, RHSVec))
- ResVec.push_back(ScalarFn(ScalarLHS, ScalarRHS));
- return std::move(ResVec);
- }
- return ScalarFn(LHS, RHS);
+AnyValue InstExecutor::computeBinOp(
+ Type *Ty, const AnyValue &LHS, const AnyValue &RHS,
+ function_ref<AnyValue(const AnyValue &, const AnyValue &)> ScalarFn) {
+ if (Ty->isVectorTy()) {
+ auto &LHSVec = LHS.asAggregate();
+ auto &RHSVec = RHS.asAggregate();
+ std::vector<AnyValue> ResVec;
+ ResVec.reserve(LHSVec.size());
+ for (const auto &[ScalarLHS, ScalarRHS] : zip(LHSVec, RHSVec))
+ ResVec.push_back(ScalarFn(ScalarLHS, ScalarRHS));
+ return std::move(ResVec);
}
+ return ScalarFn(LHS, RHS);
+}
- void visitBinOp(
- Instruction &I,
- function_ref<AnyValue(const AnyValue &, const AnyValue &)> ScalarFn) {
- setResult(I, computeBinOp(I.getType(), getValue(I.getOperand(0)),
- getValue(I.getOperand(1)), ScalarFn));
- }
+void InstExecutor::visitBinOp(
+ Instruction &I,
+ function_ref<AnyValue(const AnyValue &, const AnyValue &)> ScalarFn) {
+ setResult(I, computeBinOp(I.getType(), getValue(I.getOperand(0)),
+ getValue(I.getOperand(1)), ScalarFn));
+}
- void
- visitIntBinOp(Instruction &I,
- function_ref<AnyValue(const APInt &, const APInt &)> ScalarFn) {
- visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
- if (LHS.isPoison() || RHS.isPoison())
- return AnyValue::poison();
- return ScalarFn(LHS.asInteger(), RHS.asInteger());
- });
- }
+void InstExecutor::visitIntBinOp(
+ Instruction &I,
+ function_ref<AnyValue(const APInt &, const APInt &)> ScalarFn) {
+ visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
+ if (LHS.isPoison() || RHS.isPoison())
+ return AnyValue::poison();
+ return ScalarFn(LHS.asInteger(), RHS.asInteger());
+ });
+}
- void jumpTo(Instruction &Terminator, BasicBlock *DestBB) {
- if (!Handler.onBBJump(Terminator, *DestBB)) {
- Status = false;
- return;
- }
- BasicBlock *From = CurrentFrame->BB;
- CurrentFrame->BB = DestBB;
- CurrentFrame->PC = DestBB->begin();
- // Update PHI nodes in batch to avoid the interference between PHI nodes.
- // We need to store the incoming values into a temporary buffer.
- // Otherwise, the incoming value may be overwritten before it is
- // used by other PHI nodes.
- SmallVector<std::pair<PHINode *, AnyValue>> IncomingValues;
- PHINode *PHI = nullptr;
- while ((PHI = dyn_cast<PHINode>(CurrentFrame->PC))) {
- Value *Incoming = PHI->getIncomingValueForBlock(From);
- // TODO: handle fast-math flags.
- IncomingValues.emplace_back(PHI, getValue(Incoming));
- ++CurrentFrame->PC;
- }
- for (auto &[K, V] : IncomingValues)
- setResult(*K, std::move(V));
+void InstExecutor::jumpTo(Instruction &Terminator, BasicBlock *DestBB) {
+ if (!Handler.onBBJump(Terminator, *DestBB)) {
+ Status = false;
+ return;
}
-
- /// Helper function to determine whether an inline asm is a no-op, which is
- /// used to implement black_box style optimization blockers.
- bool isNoopInlineAsm(Value *V, Type *RetTy) {
- if (auto *Asm = dyn_cast<InlineAsm>(V))
- return Asm->getAsmString().empty() && RetTy->isVoidTy();
- return false;
+ BasicBlock *From = CurrentFrame->BB;
+ CurrentFrame->BB = DestBB;
+ CurrentFrame->PC = DestBB->begin();
+ // Update PHI nodes in batch to avoid the interference between PHI nodes.
+ // We need to store the incoming values into a temporary buffer.
+ // Otherwise, the incoming value may be overwritten before it is
+ // used by other PHI nodes.
+ SmallVector<std::pair<PHINode *, AnyValue>> IncomingValues;
+ PHINode *PHI = nullptr;
+ while ((PHI = dyn_cast<PHINode>(CurrentFrame->PC))) {
+ Value *Incoming = PHI->getIncomingValueForBlock(From);
+ // TODO: handle fast-math flags.
+ IncomingValues.emplace_back(PHI, getValue(Incoming));
+ ++CurrentFrame->PC;
}
+ for (auto &[K, V] : IncomingValues)
+ setResult(*K, std::move(V));
+}
- /// Check if the upcoming memory access is valid. Returns the offset relative
- /// to the underlying object if it is valid.
- std::optional<uint64_t> verifyMemAccess(const MemoryObject &MO,
- const APInt &Address,
- uint64_t AccessSize, Align Alignment,
- bool IsStore) {
- // Loading from a stack object outside its lifetime is not undefined
- // behavior and returns a poison value instead. Storing to it is still
- // undefined behavior.
- if (IsStore ? MO.getState() != MemoryObjectState::Alive
- : MO.getState() == MemoryObjectState::Freed) {
- reportImmediateUB("Try to access a dead memory object.");
- return std::nullopt;
- }
+/// Helper function to determine whether an inline asm is a no-op, which is
+/// used to implement black_box style optimization blockers.
+bool InstExecutor::isNoopInlineAsm(Value *V, Type *RetTy) {
+ if (auto *Asm = dyn_cast<InlineAsm>(V))
+ return Asm->getAsmString().empty() && RetTy->isVoidTy();
+ return false;
+}
- if (Address.countr_zero() < Log2(Alignment)) {
- reportImmediateUB("Misaligned memory access.");
- return std::nullopt;
- }
+/// Check if the upcoming memory access is valid. Returns the offset relative
+/// to the underlying object if it is valid.
+std::optional<uint64_t> InstExecutor::verifyMemAccess(const MemoryObject &MO,
+ const APInt &Address,
+ uint64_t AccessSize,
+ Align Alignment,
+ bool IsStore) {
+ // Loading from a stack object outside its lifetime is not undefined
+ // behavior and returns a poison value instead. Storing to it is still
+ // undefined behavior.
+ if (IsStore ? MO.getState() != MemoryObjectState::Alive
+ : MO.getState() == MemoryObjectState::Freed) {
+ reportImmediateUB("Try to access a dead memory object.");
+ return std::nullopt;
+ }
- if (AccessSize > MO.getSize() || Address.ult(MO.getAddress())) {
- reportImmediateUB("Memory access is out of bounds.");
- return std::nullopt;
- }
+ if (Address.countr_zero() < Log2(Alignment)) {
+ reportImmediateUB("Misaligned memory access.");
+ return std::nullopt;
+ }
- APInt Offset = Address - MO.getAddress();
+ if (AccessSize > MO.getSize() || Address.ult(MO.getAddress())) {
+ reportImmediateUB("Memory access is out of bounds.");
+ return std::nullopt;
+ }
- if (Offset.ugt(MO.getSize() - AccessSize)) {
- reportImmediateUB("Memory access is out of bounds.");
- return std::nullopt;
- }
+ APInt Offset = Address - MO.getAddress();
- return Offset.getZExtValue();
+ if (Offset.ugt(MO.getSize() - AccessSize)) {
+ reportImmediateUB("Memory access is out of bounds.");
+ return std::nullopt;
}
- AnyValue load(const AnyValue &Ptr, Align Alignment, Type *ValTy) {
- if (Ptr.isPoison()) {
- reportImmediateUB("Invalid memory access with a poison pointer.");
- return AnyValue::getPoisonValue(Ctx, ValTy);
- }
- auto &PtrVal = Ptr.asPointer();
- auto *MO = PtrVal.getMemoryObject();
- if (!MO) {
- reportImmediateUB(
- "Invalid memory access via a pointer with nullary provenance.");
- return AnyValue::getPoisonValue(Ctx, ValTy);
- }
- // TODO: pointer capability check
- if (auto Offset =
- verifyMemAccess(*MO, PtrVal.address(),
- Ctx.getEffectiveTypeStoreSize(ValTy), Alignment,
- /*IsStore=*/false)) {
- // Load from a dead stack object yields poison value.
- if (MO->getState() == MemoryObjectState::Dead)
- return AnyValue::getPoisonValue(Ctx, ValTy);
-
- return Ctx.load(*MO, *Offset, ValTy);
- }
+ return Offset.getZExtValue();
+}
+
+AnyValue InstExecutor::load(const AnyValue &Ptr, Align Alignment, Type *ValTy) {
+ if (Ptr.isPoison()) {
+ reportImmediateUB("Invalid memory access with a poison pointer.");
return AnyValue::getPoisonValue(Ctx, ValTy);
}
-
- void store(const AnyValue &Ptr, Align Alignment, const AnyValue &Val,
- Type *ValTy) {
- if (Ptr.isPoison()) {
- reportImmediateUB("Invalid memory access with a poison pointer.");
- return;
- }
- auto &PtrVal = Ptr.asPointer();
- auto *MO = PtrVal.getMemoryObject();
- if (!MO) {
- reportImmediateUB(
- "Invalid memory access via a pointer with nullary provenance.");
- return;
- }
- // TODO: pointer capability check
- if (auto Offset =
- verifyMemAccess(*MO, PtrVal.address(),
- Ctx.getEffectiveTypeStoreSize(ValTy), Alignment,
- /*IsStore=*/true))
- Ctx.store(*MO, *Offset, Val, ValTy);
+ auto &PtrVal = Ptr.asPointer();
+ auto *MO = PtrVal.getMemoryObject();
+ if (!MO) {
+ reportImmediateUB(
+ "Invalid memory access via a pointer with nullary provenance.");
+ return AnyValue::getPoisonValue(Ctx, ValTy);
}
+ // TODO: pointer capability check
+ if (auto Offset =
+ verifyMemAccess(*MO, PtrVal.address(),
+ Ctx.getEffectiveTypeStoreSize(ValTy), Alignment,
+ /*IsStore=*/false)) {
+ // Load from a dead stack object yields poison value.
+ if (MO->getState() == MemoryObjectState::Dead)
+ return AnyValue::getPoisonValue(Ctx, ValTy);
- AnyValue computePtrAdd(const Pointer &Ptr, const APInt &Offset,
- GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset) {
- if (Offset.isZero())
- return Ptr;
- APInt IndexBits = Ptr.address().trunc(Offset.getBitWidth());
- auto NewIndex = addNoWrap(IndexBits, Offset, /*HasNSW=*/false,
- Flags.hasNoUnsignedWrap());
- if (NewIndex.isPoison())
- return AnyValue::poison();
- if (Flags.hasNoUnsignedSignedWrap()) {
- // The successive addition of the current address, truncated to the
- // pointer index type and interpreted as an unsigned number, and each
- // offset, interpreted as a signed number, does not wrap the pointer index
- // type.
- if (Offset.isNonNegative() ? NewIndex.asInteger().ult(IndexBits)
- : NewIndex.asInteger().ugt(IndexBits))
- return AnyValue::poison();
- }
- APInt NewAddr = Ptr.address();
- NewAddr.insertBits(NewIndex.asInteger(), 0);
-
- auto *MO = Ptr.getMemoryObject();
- if (Flags.isInBounds() && (!MO || !MO->inBounds(NewAddr)))
- return AnyValue::poison();
-
- if (!AccumulatedOffset.isPoison()) {
- AccumulatedOffset =
- addNoWrap(AccumulatedOffset.asInteger(), Offset,
- Flags.hasNoUnsignedSignedWrap(), Flags.hasNoUnsignedWrap());
- if (AccumulatedOffset.isPoison())
- return AnyValue::poison();
- }
+ return Ctx.load(*MO, *Offset, ValTy);
+ }
+ return AnyValue::getPoisonValue(Ctx, ValTy);
+}
- // Should not expose provenance here even if the new address doesn't point
- // to the original object.
- return Ptr.getWithNewAddr(NewAddr);
+void InstExecutor::store(const AnyValue &Ptr, Align Alignment,
+ const AnyValue &Val, Type *ValTy) {
+ if (Ptr.isPoison()) {
+ reportImmediateUB("Invalid memory access with a poison pointer.");
+ return;
}
+ auto &PtrVal = Ptr.asPointer();
+ auto *MO = PtrVal.getMemoryObject();
+ if (!MO) {
+ reportImmediateUB(
+ "Invalid memory access via a pointer with nullary provenance.");
+ return;
+ }
+ // TODO: pointer capability check
+ if (auto Offset =
+ verifyMemAccess(*MO, PtrVal.address(),
+ Ctx.getEffectiveTypeStoreSize(ValTy), Alignment,
+ /*IsStore=*/true))
+ Ctx.store(*MO, *Offset, Val, ValTy);
+}
- AnyValue computePtrAdd(const AnyValue &Ptr, const APInt &Offset,
- GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset) {
- if (Ptr.isPoison())
+AnyValue InstExecutor::computePtrAdd(const Pointer &Ptr, const APInt &Offset,
+ GEPNoWrapFlags Flags,
+ AnyValue &AccumulatedOffset) {
+ if (Offset.isZero())
+ return Ptr;
+ APInt IndexBits = Ptr.address().trunc(Offset.getBitWidth());
+ auto NewIndex =
+ addNoWrap(IndexBits, Offset, /*HasNSW=*/false, Flags.hasNoUnsignedWrap());
+ if (NewIndex.isPoison())
+ return AnyValue::poison();
+ if (Flags.hasNoUnsignedSignedWrap()) {
+ // The successive addition of the current address, truncated to the
+ // pointer index type and interpreted as an unsigned number, and each
+ // offset, interpreted as a signed number, does not wrap the pointer index
+ // type.
+ if (Offset.isNonNegative() ? NewIndex.asInteger().ult(IndexBits)
+ : NewIndex.asInteger().ugt(IndexBits))
return AnyValue::poison();
- return computePtrAdd(Ptr.asPointer(), Offset, Flags, AccumulatedOffset);
}
+ APInt NewAddr = Ptr.address();
+ NewAddr.insertBits(NewIndex.asInteger(), 0);
- AnyValue computeScaledPtrAdd(const AnyValue &Ptr, const AnyValue &Index,
- const APInt &Scale, GEPNoWrapFlags Flags,
- AnyValue &AccumulatedOffset) {
- if (Ptr.isPoison() || Index.isPoison())
- return AnyValue::poison();
- assert(Ptr.isPointer() && Index.isInteger() && "Unexpected type.");
- if (Scale.isOne())
- return computePtrAdd(Ptr, Index.asInteger(), Flags, AccumulatedOffset);
- auto ScaledOffset =
- mulNoWrap(Index.asInteger(), Scale, Flags.hasNoUnsignedSignedWrap(),
- Flags.hasNoUnsignedWrap());
- if (ScaledOffset.isPoison())
+ auto *MO = Ptr.getMemoryObject();
+ if (Flags.isInBounds() && (!MO || !MO->inBounds(NewAddr)))
+ return AnyValue::poison();
+
+ if (!AccumulatedOffset.isPoison()) {
+ AccumulatedOffset =
+ addNoWrap(AccumulatedOffset.asInteger(), Offset,
+ Flags.hasNoUnsignedSignedWrap(), Flags.hasNoUnsignedWrap());
+ if (AccumulatedOffset.isPoison())
return AnyValue::poison();
- return computePtrAdd(Ptr, ScaledOffset.asInteger(), Flags,
- AccumulatedOffset);
}
- AnyValue canonicalizeIndex(const AnyValue &Idx, unsigned IndexBitWidth,
- GEPNoWrapFlags Flags) {
- if (Idx.isPoison())
- return AnyValue::poison();
- auto &IdxInt = Idx.asInteger();
- if (IdxInt.getBitWidth() == IndexBitWidth)
- return Idx;
- if (IdxInt.getBitWidth() > IndexBitWidth) {
- if (Flags.hasNoUnsignedSignedWrap() &&
- !IdxInt.isSignedIntN(IndexBitWidth))
- return AnyValue::poison();
+ // Should not expose provenance here even if the new address doesn't point
+ // to the original object.
+ return Ptr.getWithNewAddr(NewAddr);
+}
- if (Flags.hasNoUnsignedWrap() && !IdxInt.isIntN(IndexBitWidth))
- return AnyValue::poison();
+AnyValue InstExecutor::computePtrAdd(const AnyValue &Ptr, const APInt &Offset,
+ GEPNoWrapFlags Flags,
+ AnyValue &AccumulatedOffset) {
+ if (Ptr.isPoison())
+ return AnyValue::poison();
+ return computePtrAdd(Ptr.asPointer(), Offset, Flags, AccumulatedOffset);
+}
- return IdxInt.trunc(IndexBitWidth);
- }
- return IdxInt.sext(IndexBitWidth);
- }
+AnyValue InstExecutor::computeScaledPtrAdd(const AnyValue &Ptr,
+ const AnyValue &Index,
+ const APInt &Scale,
+ GEPNoWrapFlags Flags,
+ AnyValue &AccumulatedOffset) {
+ if (Ptr.isPoison() || Index.isPoison())
+ return AnyValue::poison();
+ assert(Ptr.isPointer() && Index.isInteger() && "Unexpected type.");
+ if (Scale.isOne())
+ return computePtrAdd(Ptr, Index.asInteger(), Flags, AccumulatedOffset);
+ auto ScaledOffset =
+ mulNoWrap(Index.asInteger(), Scale, Flags.hasNoUnsignedSignedWrap(),
+ Flags.hasNoUnsignedWrap());
+ if (ScaledOffset.isPoison())
+ return AnyValue::poison();
+ return computePtrAdd(Ptr, ScaledOffset.asInteger(), Flags, AccumulatedOffset);
+}
-public:
- InstExecutor(Context &C, EventHandler &H, Function &F,
- ArrayRef<AnyValue> Args, AnyValue &RetVal)
- : Ctx(C), DL(Ctx.getDataLayout()), Handler(H), Status(true) {
- CallStack.emplace_back(F, /*CallSite=*/nullptr, /*LastFrame=*/nullptr, Args,
- RetVal, Ctx.getTLIImpl());
- }
+AnyValue InstExecutor::canonicalizeIndex(const AnyValue &Idx,
+ unsigned IndexBitWidth,
+ GEPNoWrapFlags Flags) {
+ if (Idx.isPoison())
+ return AnyValue::poison();
+ auto &IdxInt = Idx.asInteger();
+ if (IdxInt.getBitWidth() == IndexBitWidth)
+ return Idx;
+ if (IdxInt.getBitWidth() > IndexBitWidth) {
+ if (Flags.hasNoUnsignedSignedWrap() && !IdxInt.isSignedIntN(IndexBitWidth))
+ return AnyValue::poison();
- void visitReturnInst(ReturnInst &RI) {
- if (auto *RV = RI.getReturnValue())
- CurrentFrame->RetVal = getValue(RV);
- CurrentFrame->State = FrameState::Exit;
- Status &= Handler.onInstructionExecuted(RI, None);
- }
+ if (Flags.hasNoUnsignedWrap() && !IdxInt.isIntN(IndexBitWidth))
+ return AnyValue::poison();
- void visitBranchInst(BranchInst &BI) {
- if (BI.isConditional()) {
- switch (getValue(BI.getCondition()).asBoolean()) {
- case BooleanKind::True:
- jumpTo(BI, BI.getSuccessor(0));
- return;
- case BooleanKind::False:
- jumpTo(BI, BI.getSuccessor(1));
- return;
- case BooleanKind::Poison:
- reportImmediateUB("Branch on poison condition.");
- return;
- }
- }
- jumpTo(BI, BI.getSuccessor(0));
+ return IdxInt.trunc(IndexBitWidth);
}
+ return IdxInt.sext(IndexBitWidth);
+}
+
+void InstExecutor::visitReturnInst(ReturnInst &RI) {
+ if (auto *RV = RI.getReturnValue())
+ CurrentFrame->RetVal = getValue(RV);
+ CurrentFrame->State = FrameState::Exit;
+ Status &= Handler.onInstructionExecuted(RI, None);
+}
- void visitSwitchInst(SwitchInst &SI) {
- auto &Cond = getValue(SI.getCondition());
- if (Cond.isPoison()) {
- reportImmediateUB("Switch on poison condition.");
+void InstExecutor::visitBranchInst(BranchInst &BI) {
+ if (BI.isConditional()) {
+ switch (getValue(BI.getCondition()).asBoolean()) {
+ case BooleanKind::True:
+ jumpTo(BI, BI.getSuccessor(0));
+ return;
+ case BooleanKind::False:
+ jumpTo(BI, BI.getSuccessor(1));
+ return;
+ case BooleanKind::Poison:
+ reportImmediateUB("Branch on poison condition.");
return;
}
- for (auto &Case : SI.cases()) {
- if (Case.getCaseValue()->getValue() == Cond.asInteger()) {
- jumpTo(SI, Case.getCaseSuccessor());
- return;
- }
- }
- jumpTo(SI, SI.getDefaultDest());
}
+ jumpTo(BI, BI.getSuccessor(0));
+}
- void visitUnreachableInst(UnreachableInst &) {
- reportImmediateUB("Unreachable code.");
+void InstExecutor::visitSwitchInst(SwitchInst &SI) {
+ auto &Cond = getValue(SI.getCondition());
+ if (Cond.isPoison()) {
+ reportImmediateUB("Switch on poison condition.");
+ return;
}
-
- void visitCallBrInst(CallBrInst &CI) {
- if (isNoopInlineAsm(CI.getCalledOperand(), CI.getType())) {
- jumpTo(CI, CI.getDefaultDest());
+ for (auto &Case : SI.cases()) {
+ if (Case.getCaseValue()->getValue() == Cond.asInteger()) {
+ jumpTo(SI, Case.getCaseSuccessor());
return;
}
-
- Handler.onUnrecognizedInstruction(CI);
- Status = false;
}
+ jumpTo(SI, SI.getDefaultDest());
+}
- void visitIndirectBrInst(IndirectBrInst &IBI) {
- auto &Target = getValue(IBI.getAddress());
- if (Target.isPoison()) {
- reportImmediateUB("Indirect branch on poison.");
- return;
- }
- if (BasicBlock *DestBB = Ctx.getTargetBlock(Target.asPointer())) {
- if (any_of(IBI.successors(),
- [DestBB](BasicBlock *Succ) { return Succ == DestBB; }))
- jumpTo(IBI, DestBB);
- else
- reportImmediateUB("Indirect branch on unlisted target BB.");
+void InstExecutor::visitUnreachableInst(UnreachableInst &) {
+ reportImmediateUB("Unreachable code.");
+}
- return;
- }
- reportImmediateUB("Indirect branch on invalid target BB.");
+void InstExecutor::visitCallBrInst(CallBrInst &CI) {
+ if (isNoopInlineAsm(CI.getCalledOperand(), CI.getType())) {
+ jumpTo(CI, CI.getDefaultDest());
+ return;
}
- void returnFromCallee() {
- // TODO: handle retval attributes (Attributes from known callee should be
- // applied if available).
- // TODO: handle metadata
- auto &CB = cast<CallBase>(*CurrentFrame->PC);
- CurrentFrame->CalleeArgs.clear();
- AnyValue &RetVal = CurrentFrame->CalleeRetVal;
- setResult(CB, std::move(RetVal));
+ Handler.onUnrecognizedInstruction(CI);
+ Status = false;
+}
- if (auto *II = dyn_cast<InvokeInst>(&CB))
- jumpTo(*II, II->getNormalDest());
- else if (CurrentFrame->State == FrameState::Pending)
- ++CurrentFrame->PC;
+void InstExecutor::visitIndirectBrInst(IndirectBrInst &IBI) {
+ auto &Target = getValue(IBI.getAddress());
+ if (Target.isPoison()) {
+ reportImmediateUB("Indirect branch on poison.");
+ return;
+ }
+ if (BasicBlock *DestBB = Ctx.getTargetBlock(Target.asPointer())) {
+ if (any_of(IBI.successors(),
+ [DestBB](BasicBlock *Succ) { return Succ == DestBB; }))
+ jumpTo(IBI, DestBB);
+ else
+ reportImmediateUB("Indirect branch on unlisted target BB.");
+
+ return;
}
+ reportImmediateUB("Indirect branch on invalid target BB.");
+}
- AnyValue callIntrinsic(CallBase &CB) {
- Intrinsic::ID IID = CB.getIntrinsicID();
- switch (IID) {
- case Intrinsic::assume:
- switch (getValue(CB.getArgOperand(0)).asBoolean()) {
- case BooleanKind::True:
- break;
- case BooleanKind::False:
- case BooleanKind::Poison:
- reportImmediateUB("Assume on false or poison condition.");
- break;
- }
- // TODO: handle llvm.assume with operand bundles
- return AnyValue();
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end: {
- auto *Ptr = CB.getArgOperand(0);
- if (isa<PoisonValue>(Ptr))
- return AnyValue();
- auto *MO = getValue(Ptr).asPointer().getMemoryObject();
- assert(MO && "Memory object accessed by lifetime intrinsic should be "
- "always valid.");
- if (IID == Intrinsic::lifetime_start) {
- MO->setState(MemoryObjectState::Alive);
- fill(MO->getBytes(), Byte::undef());
- } else {
- MO->setState(MemoryObjectState::Dead);
- }
- return AnyValue();
+void InstExecutor::returnFromCallee() {
+ // TODO: handle retval attributes (Attributes from known callee should be
+ // applied if available).
+ // TODO: handle metadata
+ auto &CB = cast<CallBase>(*CurrentFrame->PC);
+ CurrentFrame->CalleeArgs.clear();
+ AnyValue &RetVal = CurrentFrame->CalleeRetVal;
+ setResult(CB, std::move(RetVal));
+
+ if (auto *II = dyn_cast<InvokeInst>(&CB))
+ jumpTo(*II, II->getNormalDest());
+ else if (CurrentFrame->State == FrameState::Pending)
+ ++CurrentFrame->PC;
+}
+
+AnyValue InstExecutor::callIntrinsic(CallBase &CB) {
+ Intrinsic::ID IID = CB.getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::assume:
+ switch (getValue(CB.getArgOperand(0)).asBoolean()) {
+ case BooleanKind::True:
+ break;
+ case BooleanKind::False:
+ case BooleanKind::Poison:
+ reportImmediateUB("Assume on false or poison condition.");
+ break;
}
- default:
- Handler.onUnrecognizedInstruction(CB);
- Status = false;
+ // TODO: handle llvm.assume with operand bundles
+ return AnyValue();
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ auto *Ptr = CB.getArgOperand(0);
+ if (isa<PoisonValue>(Ptr))
return AnyValue();
+ auto *MO = getValue(Ptr).asPointer().getMemoryObject();
+ assert(MO && "Memory object accessed by lifetime intrinsic should be "
+ "always valid.");
+ if (IID == Intrinsic::lifetime_start) {
+ MO->setState(MemoryObjectState::Alive);
+ fill(MO->getBytes(), Byte::undef());
+ } else {
+ MO->setState(MemoryObjectState::Dead);
}
+ return AnyValue();
}
+ default:
+ Handler.onUnrecognizedInstruction(CB);
+ Status = false;
+ return AnyValue();
+ }
+}
- AnyValue callLibFunc(CallBase &CB, Function *ResolvedCallee) {
- LibFunc LF;
- // Respect nobuiltin attributes on call site.
- if (CB.isNoBuiltin() ||
- !CurrentFrame->TLI.getLibFunc(*ResolvedCallee, LF)) {
- Handler.onUnrecognizedInstruction(CB);
- Status = false;
- return AnyValue();
+AnyValue InstExecutor::callLibFunc(CallBase &CB, Function *ResolvedCallee) {
+ LibraryEnvironment LibEnv(Ctx, Handler, DL, *this);
+
+ LibFunc LF;
+ // Respect nobuiltin attributes on call site.
+ if (!CB.isNoBuiltin() && CurrentFrame->TLI.getLibFunc(*ResolvedCallee, LF)) {
+ if (auto Result = LibEnv.call(LF, CB)) {
+ return *Result;
}
- Handler.onUnrecognizedInstruction(CB);
+ // Library call requested an immediate halt. If this came from a program
+ // termination API (exit/abort/terminate), keep it distinct from failure.
+ if (hasProgramExit())
+ return AnyValue();
+
Status = false;
return AnyValue();
}
- void enterCall(CallBase &CB) {
- Function *Callee = CB.getCalledFunction();
- // TODO: handle parameter attributes (Attributes from known callee should be
- // applied if available).
- // TODO: handle byval/initializes
- auto &CalleeArgs = CurrentFrame->CalleeArgs;
- assert(CalleeArgs.empty() &&
- "Forgot to call returnFromCallee before entering a new call.");
- for (Value *Arg : CB.args())
- CalleeArgs.push_back(getValue(Arg));
+ Handler.onUnrecognizedInstruction(CB);
+ Status = false;
+ return AnyValue();
+}
- if (!Callee) {
- Value *CalledOperand = CB.getCalledOperand();
- if (isNoopInlineAsm(CalledOperand, CB.getType())) {
- CurrentFrame->ResolvedCallee = nullptr;
- returnFromCallee();
- return;
- }
+void InstExecutor::requestProgramExit(ProgramExitKind Kind, uint64_t ExitCode) {
+ ExitInfo = ProgramExitInfo{Kind, ExitCode};
+ Handler.onProgramExit(Kind, ExitCode);
+ // Program termination APIs always stop interpretation.
+ Status = false;
+}
- if (isa<InlineAsm>(CalledOperand)) {
- Handler.onUnrecognizedInstruction(CB);
- Status = false;
- return;
- }
+void InstExecutor::enterCall(CallBase &CB) {
+ Function *Callee = CB.getCalledFunction();
+ // TODO: handle parameter attributes (Attributes from known callee should
+ // be applied if available).
+ // TODO: handle byval/initializes
+ auto &CalleeArgs = CurrentFrame->CalleeArgs;
+ assert(CalleeArgs.empty() &&
+ "Forgot to call returnFromCallee before entering a new call.");
+ for (Value *Arg : CB.args())
+ CalleeArgs.push_back(getValue(Arg));
+
+ if (!Callee) {
+ Value *CalledOperand = CB.getCalledOperand();
+ if (isNoopInlineAsm(CalledOperand, CB.getType())) {
+ CurrentFrame->ResolvedCallee = nullptr;
+ returnFromCallee();
+ return;
+ }
- auto &CalleeVal = getValue(CalledOperand);
- if (CalleeVal.isPoison()) {
- reportImmediateUB("Indirect call through poison function pointer.");
- return;
- }
- Callee = Ctx.getTargetFunction(CalleeVal.asPointer());
- if (!Callee) {
- reportImmediateUB("Indirect call through invalid function pointer.");
- return;
- }
- if (Callee->getFunctionType() != CB.getFunctionType()) {
- reportImmediateUB("Indirect call through a function pointer with "
- "mismatched signature.");
- return;
- }
+ if (isa<InlineAsm>(CalledOperand)) {
+ Handler.onUnrecognizedInstruction(CB);
+ Status = false;
+ return;
}
- assert(Callee && "Expected a resolved callee function.");
- assert(
- Callee->getFunctionType() == CB.getFunctionType() &&
- "Expected the callee function type to match the call site signature.");
- CurrentFrame->ResolvedCallee = Callee;
- if (Callee->isIntrinsic()) {
- CurrentFrame->CalleeRetVal = callIntrinsic(CB);
- returnFromCallee();
+ auto &CalleeVal = getValue(CalledOperand);
+ if (CalleeVal.isPoison()) {
+ reportImmediateUB("Indirect call through poison function pointer.");
return;
- } else if (Callee->isDeclaration()) {
- CurrentFrame->CalleeRetVal = callLibFunc(CB, Callee);
- returnFromCallee();
+ }
+ Callee = Ctx.getTargetFunction(CalleeVal.asPointer());
+ if (!Callee) {
+ reportImmediateUB("Indirect call through invalid function pointer.");
+ return;
+ }
+ if (Callee->getFunctionType() != CB.getFunctionType()) {
+ reportImmediateUB("Indirect call through a function pointer with "
+ "mismatched signature.");
return;
- } else {
- uint32_t MaxStackDepth = Ctx.getMaxStackDepth();
- if (MaxStackDepth && CallStack.size() >= MaxStackDepth) {
- reportError("Maximum stack depth exceeded.");
- return;
- }
- assert(!Callee->empty() && "Expected a defined function.");
- // Suspend the current frame and push the callee frame onto the stack.
- ArrayRef<AnyValue> Args = CurrentFrame->CalleeArgs;
- AnyValue &RetVal = CurrentFrame->CalleeRetVal;
- CurrentFrame->State = FrameState::Pending;
- CallStack.emplace_back(*Callee, &CB, CurrentFrame, Args, RetVal,
- Ctx.getTLIImpl());
}
}
- void visitCallInst(CallInst &CI) { enterCall(CI); }
-
- void visitInvokeInst(InvokeInst &II) {
- // TODO: handle exceptions
- enterCall(II);
+ assert(Callee && "Expected a resolved callee function.");
+ assert(Callee->getFunctionType() == CB.getFunctionType() &&
+ "Expected the callee function type to match the call site "
+ "signature.");
+ CurrentFrame->ResolvedCallee = Callee;
+ if (Callee->isIntrinsic()) {
+ CurrentFrame->CalleeRetVal = callIntrinsic(CB);
+ returnFromCallee();
+ return;
+ } else if (Callee->isDeclaration()) {
+ CurrentFrame->CalleeRetVal = callLibFunc(CB, Callee);
+ returnFromCallee();
+ return;
+ } else {
+ uint32_t MaxStackDepth = Ctx.getMaxStackDepth();
+ if (MaxStackDepth && CallStack.size() >= MaxStackDepth) {
+ reportError("Maximum stack depth exceeded.");
+ return;
+ }
+ assert(!Callee->empty() && "Expected a defined function.");
+ // Suspend the current frame and push the callee frame onto the stack.
+ ArrayRef<AnyValue> Args = CurrentFrame->CalleeArgs;
+ AnyValue &RetVal = CurrentFrame->CalleeRetVal;
+ CurrentFrame->State = FrameState::Pending;
+ CallStack.emplace_back(*Callee, &CB, CurrentFrame, Args, RetVal,
+ Ctx.getTLIImpl());
}
+}
- void visitAdd(BinaryOperator &I) {
- visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) {
- return addNoWrap(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap());
- });
- }
+void InstExecutor::visitCallInst(CallInst &CI) { enterCall(CI); }
- void visitSub(BinaryOperator &I) {
- visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) {
- return subNoWrap(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap());
- });
- }
+void InstExecutor::visitInvokeInst(InvokeInst &II) {
+ // TODO: handle exceptions
+ enterCall(II);
+}
- void visitMul(BinaryOperator &I) {
- visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) {
- return mulNoWrap(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap());
- });
- }
+void InstExecutor::visitAdd(BinaryOperator &I) {
+ visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) {
+ return addNoWrap(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap());
+ });
+}
- void visitSDiv(BinaryOperator &I) {
- visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
- // Priority: Immediate UB > poison > normal value
- if (RHS.isPoison()) {
- reportImmediateUB("Division by zero (refine RHS to 0).");
- return AnyValue::poison();
- }
- const APInt &RHSVal = RHS.asInteger();
- if (RHSVal.isZero()) {
- reportImmediateUB("Division by zero.");
- return AnyValue::poison();
- }
- if (LHS.isPoison()) {
- if (RHSVal.isAllOnes())
- reportImmediateUB(
- "Signed division overflow (refine LHS to INT_MIN).");
- return AnyValue::poison();
- }
- const APInt &LHSVal = LHS.asInteger();
- if (LHSVal.isMinSignedValue() && RHSVal.isAllOnes()) {
- reportImmediateUB("Signed division overflow.");
- return AnyValue::poison();
- }
+void InstExecutor::visitSub(BinaryOperator &I) {
+ visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) {
+ return subNoWrap(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap());
+ });
+}
- if (I.isExact()) {
- APInt Q, R;
- APInt::sdivrem(LHSVal, RHSVal, Q, R);
- if (!R.isZero())
- return AnyValue::poison();
- return Q;
- } else {
- return LHSVal.sdiv(RHSVal);
- }
- });
- }
+void InstExecutor::visitMul(BinaryOperator &I) {
+ visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) {
+ return mulNoWrap(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap());
+ });
+}
- void visitSRem(BinaryOperator &I) {
- visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
- // Priority: Immediate UB > poison > normal value
- if (RHS.isPoison()) {
- reportImmediateUB("Division by zero (refine RHS to 0).");
- return AnyValue::poison();
- }
- const APInt &RHSVal = RHS.asInteger();
- if (RHSVal.isZero()) {
- reportImmediateUB("Division by zero.");
- return AnyValue::poison();
- }
- if (LHS.isPoison()) {
- if (RHSVal.isAllOnes())
- reportImmediateUB(
- "Signed division overflow (refine LHS to INT_MIN).");
- return AnyValue::poison();
- }
- const APInt &LHSVal = LHS.asInteger();
- if (LHSVal.isMinSignedValue() && RHSVal.isAllOnes()) {
- reportImmediateUB("Signed division overflow.");
+void InstExecutor::visitSDiv(BinaryOperator &I) {
+ visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
+ // Priority: Immediate UB > poison > normal value
+ if (RHS.isPoison()) {
+ reportImmediateUB("Division by zero (refine RHS to 0).");
+ return AnyValue::poison();
+ }
+ const APInt &RHSVal = RHS.asInteger();
+ if (RHSVal.isZero()) {
+ reportImmediateUB("Division by zero.");
+ return AnyValue::poison();
+ }
+ if (LHS.isPoison()) {
+ if (RHSVal.isAllOnes())
+ reportImmediateUB("Signed division overflow (refine LHS to INT_MIN).");
+ return AnyValue::poison();
+ }
+ const APInt &LHSVal = LHS.asInteger();
+ if (LHSVal.isMinSignedValue() && RHSVal.isAllOnes()) {
+ reportImmediateUB("Signed division overflow.");
+ return AnyValue::poison();
+ }
+
+ if (I.isExact()) {
+ APInt Q, R;
+ APInt::sdivrem(LHSVal, RHSVal, Q, R);
+ if (!R.isZero())
return AnyValue::poison();
- }
+ return Q;
+ } else {
+ return LHSVal.sdiv(RHSVal);
+ }
+ });
+}
- return LHSVal.srem(RHSVal);
- });
- }
+void InstExecutor::visitSRem(BinaryOperator &I) {
+ visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
+ // Priority: Immediate UB > poison > normal value
+ if (RHS.isPoison()) {
+ reportImmediateUB("Division by zero (refine RHS to 0).");
+ return AnyValue::poison();
+ }
+ const APInt &RHSVal = RHS.asInteger();
+ if (RHSVal.isZero()) {
+ reportImmediateUB("Division by zero.");
+ return AnyValue::poison();
+ }
+ if (LHS.isPoison()) {
+ if (RHSVal.isAllOnes())
+ reportImmediateUB("Signed division overflow (refine LHS to INT_MIN).");
+ return AnyValue::poison();
+ }
+ const APInt &LHSVal = LHS.asInteger();
+ if (LHSVal.isMinSignedValue() && RHSVal.isAllOnes()) {
+ reportImmediateUB("Signed division overflow.");
+ return AnyValue::poison();
+ }
- void visitUDiv(BinaryOperator &I) {
- visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
- // Priority: Immediate UB > poison > normal value
- if (RHS.isPoison()) {
- reportImmediateUB("Division by zero (refine RHS to 0).");
- return AnyValue::poison();
- }
- const APInt &RHSVal = RHS.asInteger();
- if (RHSVal.isZero()) {
- reportImmediateUB("Division by zero.");
- return AnyValue::poison();
- }
- if (LHS.isPoison())
- return AnyValue::poison();
- const APInt &LHSVal = LHS.asInteger();
-
- if (I.isExact()) {
- APInt Q, R;
- APInt::udivrem(LHSVal, RHSVal, Q, R);
- if (!R.isZero())
- return AnyValue::poison();
- return Q;
- } else {
- return LHSVal.udiv(RHSVal);
- }
- });
- }
+ return LHSVal.srem(RHSVal);
+ });
+}
- void visitURem(BinaryOperator &I) {
- visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
- // Priority: Immediate UB > poison > normal value
- if (RHS.isPoison()) {
- reportImmediateUB("Division by zero (refine RHS to 0).");
- return AnyValue::poison();
- }
- const APInt &RHSVal = RHS.asInteger();
- if (RHSVal.isZero()) {
- reportImmediateUB("Division by zero.");
- return AnyValue::poison();
- }
- if (LHS.isPoison())
- return AnyValue::poison();
- const APInt &LHSVal = LHS.asInteger();
- return LHSVal.urem(RHSVal);
- });
- }
+void InstExecutor::visitUDiv(BinaryOperator &I) {
+ visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
+ // Priority: Immediate UB > poison > normal value
+ if (RHS.isPoison()) {
+ reportImmediateUB("Division by zero (refine RHS to 0).");
+ return AnyValue::poison();
+ }
+ const APInt &RHSVal = RHS.asInteger();
+ if (RHSVal.isZero()) {
+ reportImmediateUB("Division by zero.");
+ return AnyValue::poison();
+ }
+ if (LHS.isPoison())
+ return AnyValue::poison();
+ const APInt &LHSVal = LHS.asInteger();
- void visitTruncInst(TruncInst &Trunc) {
- visitIntUnOp(Trunc, [&](const APInt &Operand) -> AnyValue {
- unsigned DestBW = Trunc.getType()->getScalarSizeInBits();
- if (Trunc.hasNoSignedWrap() && Operand.getSignificantBits() > DestBW)
- return AnyValue::poison();
- if (Trunc.hasNoUnsignedWrap() && Operand.getActiveBits() > DestBW)
+ if (I.isExact()) {
+ APInt Q, R;
+ APInt::udivrem(LHSVal, RHSVal, Q, R);
+ if (!R.isZero())
return AnyValue::poison();
- return Operand.trunc(DestBW);
- });
- }
+ return Q;
+ } else {
+ return LHSVal.udiv(RHSVal);
+ }
+ });
+}
- void visitZExtInst(ZExtInst &ZExt) {
- visitIntUnOp(ZExt, [&](const APInt &Operand) -> AnyValue {
- uint32_t DestBW = ZExt.getDestTy()->getScalarSizeInBits();
- if (ZExt.hasNonNeg() && Operand.isNegative())
- return AnyValue::poison();
- return Operand.zext(DestBW);
- });
- }
+void InstExecutor::visitURem(BinaryOperator &I) {
+ visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
+ // Priority: Immediate UB > poison > normal value
+ if (RHS.isPoison()) {
+ reportImmediateUB("Division by zero (refine RHS to 0).");
+ return AnyValue::poison();
+ }
+ const APInt &RHSVal = RHS.asInteger();
+ if (RHSVal.isZero()) {
+ reportImmediateUB("Division by zero.");
+ return AnyValue::poison();
+ }
+ if (LHS.isPoison())
+ return AnyValue::poison();
+ const APInt &LHSVal = LHS.asInteger();
+ return LHSVal.urem(RHSVal);
+ });
+}
- void visitSExtInst(SExtInst &SExt) {
- visitIntUnOp(SExt, [&](const APInt &Operand) -> AnyValue {
- uint32_t DestBW = SExt.getDestTy()->getScalarSizeInBits();
- return Operand.sext(DestBW);
- });
- }
+void InstExecutor::visitTruncInst(TruncInst &Trunc) {
+ visitIntUnOp(Trunc, [&](const APInt &Operand) -> AnyValue {
+ unsigned DestBW = Trunc.getType()->getScalarSizeInBits();
+ if (Trunc.hasNoSignedWrap() && Operand.getSignificantBits() > DestBW)
+ return AnyValue::poison();
+ if (Trunc.hasNoUnsignedWrap() && Operand.getActiveBits() > DestBW)
+ return AnyValue::poison();
+ return Operand.trunc(DestBW);
+ });
+}
- void visitAnd(BinaryOperator &I) {
- visitIntBinOp(I, [](const APInt &LHS, const APInt &RHS) -> AnyValue {
- return LHS & RHS;
- });
- }
+void InstExecutor::visitZExtInst(ZExtInst &ZExt) {
+ visitIntUnOp(ZExt, [&](const APInt &Operand) -> AnyValue {
+ uint32_t DestBW = ZExt.getDestTy()->getScalarSizeInBits();
+ if (ZExt.hasNonNeg() && Operand.isNegative())
+ return AnyValue::poison();
+ return Operand.zext(DestBW);
+ });
+}
- void visitXor(BinaryOperator &I) {
- visitIntBinOp(I, [](const APInt &LHS, const APInt &RHS) -> AnyValue {
- return LHS ^ RHS;
- });
- }
+void InstExecutor::visitSExtInst(SExtInst &SExt) {
+ visitIntUnOp(SExt, [&](const APInt &Operand) -> AnyValue {
+ uint32_t DestBW = SExt.getDestTy()->getScalarSizeInBits();
+ return Operand.sext(DestBW);
+ });
+}
- void visitOr(BinaryOperator &I) {
- visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
- if (cast<PossiblyDisjointInst>(I).isDisjoint() && LHS.intersects(RHS))
- return AnyValue::poison();
- return LHS | RHS;
- });
- }
+void InstExecutor::visitAnd(BinaryOperator &I) {
+ visitIntBinOp(I, [](const APInt &LHS, const APInt &RHS) -> AnyValue {
+ return LHS & RHS;
+ });
+}
- void visitShl(BinaryOperator &I) {
- visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
- if (RHS.uge(LHS.getBitWidth()))
- return AnyValue::poison();
- if (I.hasNoSignedWrap() && RHS.uge(LHS.getNumSignBits()))
- return AnyValue::poison();
- if (I.hasNoUnsignedWrap() && RHS.ugt(LHS.countl_zero()))
- return AnyValue::poison();
- return LHS.shl(RHS);
- });
- }
+void InstExecutor::visitXor(BinaryOperator &I) {
+ visitIntBinOp(I, [](const APInt &LHS, const APInt &RHS) -> AnyValue {
+ return LHS ^ RHS;
+ });
+}
- void visitLShr(BinaryOperator &I) {
- visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
- if (RHS.uge(cast<PossiblyExactOperator>(I).isExact()
- ? LHS.countr_zero() + 1
- : LHS.getBitWidth()))
- return AnyValue::poison();
- return LHS.lshr(RHS);
- });
- }
+void InstExecutor::visitOr(BinaryOperator &I) {
+ visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
+ if (cast<PossiblyDisjointInst>(I).isDisjoint() && LHS.intersects(RHS))
+ return AnyValue::poison();
+ return LHS | RHS;
+ });
+}
- void visitAShr(BinaryOperator &I) {
- visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
- if (RHS.uge(cast<PossiblyExactOperator>(I).isExact()
- ? LHS.countr_zero() + 1
- : LHS.getBitWidth()))
- return AnyValue::poison();
- return LHS.ashr(RHS);
- });
- }
+void InstExecutor::visitShl(BinaryOperator &I) {
+ visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
+ if (RHS.uge(LHS.getBitWidth()))
+ return AnyValue::poison();
+ if (I.hasNoSignedWrap() && RHS.uge(LHS.getNumSignBits()))
+ return AnyValue::poison();
+ if (I.hasNoUnsignedWrap() && RHS.ugt(LHS.countl_zero()))
+ return AnyValue::poison();
+ return LHS.shl(RHS);
+ });
+}
- void visitICmpInst(ICmpInst &I) {
- visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
- if (LHS.isPoison() || RHS.isPoison())
- return AnyValue::poison();
- // TODO: handle pointer comparison.
- const APInt &LHSVal = LHS.asInteger();
- const APInt &RHSVal = RHS.asInteger();
- if (I.hasSameSign() && LHSVal.isNonNegative() != RHSVal.isNonNegative())
- return AnyValue::poison();
- return AnyValue::boolean(
- ICmpInst::compare(LHSVal, RHSVal, I.getPredicate()));
- });
- }
+void InstExecutor::visitLShr(BinaryOperator &I) {
+ visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
+ if (RHS.uge(cast<PossiblyExactOperator>(I).isExact() ? LHS.countr_zero() + 1
+ : LHS.getBitWidth()))
+ return AnyValue::poison();
+ return LHS.lshr(RHS);
+ });
+}
- void visitSelect(SelectInst &SI) {
- // TODO: handle fast-math flags.
- if (SI.getCondition()->getType()->isIntegerTy(1)) {
- switch (getValue(SI.getCondition()).asBoolean()) {
- case BooleanKind::True:
- setResult(SI, getValue(SI.getTrueValue()));
- return;
- case BooleanKind::False:
- setResult(SI, getValue(SI.getFalseValue()));
- return;
- case BooleanKind::Poison:
- setResult(SI, AnyValue::getPoisonValue(Ctx, SI.getType()));
- return;
- }
+void InstExecutor::visitAShr(BinaryOperator &I) {
+ visitIntBinOp(I, [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
+ if (RHS.uge(cast<PossiblyExactOperator>(I).isExact() ? LHS.countr_zero() + 1
+ : LHS.getBitWidth()))
+ return AnyValue::poison();
+ return LHS.ashr(RHS);
+ });
+}
+
+void InstExecutor::visitICmpInst(ICmpInst &I) {
+ visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
+ if (LHS.isPoison() || RHS.isPoison())
+ return AnyValue::poison();
+ // TODO: handle pointer comparison.
+ const APInt &LHSVal = LHS.asInteger();
+ const APInt &RHSVal = RHS.asInteger();
+ if (I.hasSameSign() && LHSVal.isNonNegative() != RHSVal.isNonNegative())
+ return AnyValue::poison();
+ return AnyValue::boolean(
+ ICmpInst::compare(LHSVal, RHSVal, I.getPredicate()));
+ });
+}
+
+void InstExecutor::visitSelect(SelectInst &SI) {
+ // TODO: handle fast-math flags.
+ if (SI.getCondition()->getType()->isIntegerTy(1)) {
+ switch (getValue(SI.getCondition()).asBoolean()) {
+ case BooleanKind::True:
+ setResult(SI, getValue(SI.getTrueValue()));
+ return;
+ case BooleanKind::False:
+ setResult(SI, getValue(SI.getFalseValue()));
+ return;
+ case BooleanKind::Poison:
+ setResult(SI, AnyValue::getPoisonValue(Ctx, SI.getType()));
+ return;
}
+ }
- auto &Cond = getValue(SI.getCondition()).asAggregate();
- auto &TV = getValue(SI.getTrueValue()).asAggregate();
- auto &FV = getValue(SI.getFalseValue()).asAggregate();
- std::vector<AnyValue> Res;
- size_t Len = Cond.size();
- Res.reserve(Len);
- for (uint32_t I = 0; I != Len; ++I) {
- switch (Cond[I].asBoolean()) {
- case BooleanKind::True:
- Res.push_back(TV[I]);
- break;
- case BooleanKind::False:
- Res.push_back(FV[I]);
- break;
- case BooleanKind::Poison:
- Res.push_back(AnyValue::poison());
- break;
- }
+ auto &Cond = getValue(SI.getCondition()).asAggregate();
+ auto &TV = getValue(SI.getTrueValue()).asAggregate();
+ auto &FV = getValue(SI.getFalseValue()).asAggregate();
+ std::vector<AnyValue> Res;
+ size_t Len = Cond.size();
+ Res.reserve(Len);
+ for (uint32_t I = 0; I != Len; ++I) {
+ switch (Cond[I].asBoolean()) {
+ case BooleanKind::True:
+ Res.push_back(TV[I]);
+ break;
+ case BooleanKind::False:
+ Res.push_back(FV[I]);
+ break;
+ case BooleanKind::Poison:
+ Res.push_back(AnyValue::poison());
+ break;
}
- setResult(SI, std::move(Res));
}
+ setResult(SI, std::move(Res));
+}
- void visitAllocaInst(AllocaInst &AI) {
- uint64_t AllocSize = Ctx.getEffectiveTypeAllocSize(AI.getAllocatedType());
- if (AI.isArrayAllocation()) {
- auto &Size = getValue(AI.getArraySize());
- if (Size.isPoison()) {
- reportImmediateUB("Alloca with poison array size.");
- return;
- }
- if (Size.asInteger().getActiveBits() > 64) {
- reportImmediateUB(
- "Alloca with large array size that overflows uint64_t.");
- return;
- }
- bool Overflowed = false;
- AllocSize = SaturatingMultiply(AllocSize, Size.asInteger().getZExtValue(),
- &Overflowed);
- if (Overflowed) {
- reportImmediateUB(
- "Alloca with allocation size that overflows uint64_t.");
- return;
- }
+void InstExecutor::visitAllocaInst(AllocaInst &AI) {
+ uint64_t AllocSize = Ctx.getEffectiveTypeAllocSize(AI.getAllocatedType());
+ if (AI.isArrayAllocation()) {
+ auto &Size = getValue(AI.getArraySize());
+ if (Size.isPoison()) {
+ reportImmediateUB("Alloca with poison array size.");
+ return;
}
- // If it is used by llvm.lifetime.start, it should be initially dead.
- bool IsInitiallyDead = any_of(AI.users(), [](User *U) {
- return match(U, m_Intrinsic<Intrinsic::lifetime_start>());
- });
- auto Obj = Ctx.allocate(AllocSize, AI.getPointerAlignment(DL).value(),
- AI.getName(), AI.getAddressSpace(),
- IsInitiallyDead ? MemInitKind::Poisoned
- : MemInitKind::Uninitialized);
- if (!Obj) {
- reportError("Insufficient stack space.");
+ if (Size.asInteger().getActiveBits() > 64) {
+ reportImmediateUB(
+ "Alloca with large array size that overflows uint64_t.");
return;
}
- CurrentFrame->Allocas.push_back(Obj);
- setResult(AI, Ctx.deriveFromMemoryObject(Obj));
+ bool Overflowed = false;
+ AllocSize = SaturatingMultiply(AllocSize, Size.asInteger().getZExtValue(),
+ &Overflowed);
+ if (Overflowed) {
+ reportImmediateUB("Alloca with allocation size that overflows uint64_t.");
+ return;
+ }
+ }
+ // If it is used by llvm.lifetime.start, it should be initially dead.
+ bool IsInitiallyDead = any_of(AI.users(), [](User *U) {
+ return match(U, m_Intrinsic<Intrinsic::lifetime_start>());
+ });
+ auto Obj = Ctx.allocate(AllocSize, AI.getPointerAlignment(DL).value(),
+ AI.getName(), AI.getAddressSpace(),
+ IsInitiallyDead ? MemInitKind::Poisoned
+ : MemInitKind::Uninitialized);
+ if (!Obj) {
+ reportError("Insufficient stack space.");
+ return;
}
+ CurrentFrame->Allocas.push_back(Obj);
+ setResult(AI, Ctx.deriveFromMemoryObject(Obj));
+}
- void visitGetElementPtrInst(GetElementPtrInst &GEP) {
- uint32_t IndexBitWidth =
- DL.getIndexSizeInBits(GEP.getType()->getPointerAddressSpace());
- GEPNoWrapFlags Flags = GEP.getNoWrapFlags();
- AnyValue Res = getValue(GEP.getPointerOperand());
- AnyValue AccumulatedOffset = APInt(IndexBitWidth, 0);
- if (Res.isAggregate())
- AccumulatedOffset =
- AnyValue::getVectorSplat(AccumulatedOffset, Res.asAggregate().size());
- auto ApplyScaledOffset = [&](const AnyValue &Index, const APInt &Scale) {
- if (Index.isAggregate() && !Res.isAggregate()) {
- Res = AnyValue::getVectorSplat(Res, Index.asAggregate().size());
- AccumulatedOffset = AnyValue::getVectorSplat(
- AccumulatedOffset, Index.asAggregate().size());
- }
- if (Index.isAggregate() && Res.isAggregate()) {
- for (auto &&[ResElem, IndexElem, OffsetElem] :
- zip(Res.asAggregate(), Index.asAggregate(),
- AccumulatedOffset.asAggregate()))
- ResElem = computeScaledPtrAdd(
- ResElem, canonicalizeIndex(IndexElem, IndexBitWidth, Flags),
- Scale, Flags, OffsetElem);
+void InstExecutor::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ uint32_t IndexBitWidth =
+ DL.getIndexSizeInBits(GEP.getType()->getPointerAddressSpace());
+ GEPNoWrapFlags Flags = GEP.getNoWrapFlags();
+ AnyValue Res = getValue(GEP.getPointerOperand());
+ AnyValue AccumulatedOffset = APInt(IndexBitWidth, 0);
+ if (Res.isAggregate())
+ AccumulatedOffset =
+ AnyValue::getVectorSplat(AccumulatedOffset, Res.asAggregate().size());
+ auto ApplyScaledOffset = [&](const AnyValue &Index, const APInt &Scale) {
+ if (Index.isAggregate() && !Res.isAggregate()) {
+ Res = AnyValue::getVectorSplat(Res, Index.asAggregate().size());
+ AccumulatedOffset = AnyValue::getVectorSplat(AccumulatedOffset,
+ Index.asAggregate().size());
+ }
+ if (Index.isAggregate() && Res.isAggregate()) {
+ for (auto &&[ResElem, IndexElem, OffsetElem] :
+ zip(Res.asAggregate(), Index.asAggregate(),
+ AccumulatedOffset.asAggregate()))
+ ResElem = computeScaledPtrAdd(
+ ResElem, canonicalizeIndex(IndexElem, IndexBitWidth, Flags), Scale,
+ Flags, OffsetElem);
+ } else {
+ AnyValue CanonicalIndex = canonicalizeIndex(Index, IndexBitWidth, Flags);
+ if (Res.isAggregate()) {
+ for (auto &&[ResElem, OffsetElem] :
+ zip(Res.asAggregate(), AccumulatedOffset.asAggregate()))
+ ResElem = computeScaledPtrAdd(ResElem, CanonicalIndex, Scale, Flags,
+ OffsetElem);
} else {
- AnyValue CanonicalIndex =
- canonicalizeIndex(Index, IndexBitWidth, Flags);
- if (Res.isAggregate()) {
- for (auto &&[ResElem, OffsetElem] :
- zip(Res.asAggregate(), AccumulatedOffset.asAggregate()))
- ResElem = computeScaledPtrAdd(ResElem, CanonicalIndex, Scale, Flags,
- OffsetElem);
- } else {
- Res = computeScaledPtrAdd(Res, CanonicalIndex, Scale, Flags,
- AccumulatedOffset);
- }
+ Res = computeScaledPtrAdd(Res, CanonicalIndex, Scale, Flags,
+ AccumulatedOffset);
}
- };
+ }
+ };
- for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
- GTI != GTE; ++GTI) {
- Value *V = GTI.getOperand();
+ for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ Value *V = GTI.getOperand();
- // Fast path for zero offsets.
- if (auto *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->isZero())
- continue;
- }
- if (isa<ConstantAggregateZero>(V))
+ // Fast path for zero offsets.
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->isZero())
continue;
-
- // Handle a struct index, which adds its field offset to the pointer.
- if (StructType *STy = GTI.getStructTypeOrNull()) {
- unsigned ElementIdx = cast<ConstantInt>(V)->getZExtValue();
- const StructLayout *SL = DL.getStructLayout(STy);
- // Element offset is in bytes.
- ApplyScaledOffset(
- APInt(IndexBitWidth, SL->getElementOffset(ElementIdx)),
- APInt(IndexBitWidth, 1));
- continue;
- }
-
- // Truncate if type size exceeds index space.
- // TODO: Should be documented in LangRef: GEPs with nowrap flags should
- // return poison when the type size exceeds index space.
- TypeSize Offset = GTI.getSequentialElementStride(DL);
- APInt Scale(IndexBitWidth, Ctx.getEffectiveTypeSize(Offset),
- /*isSigned=*/false, /*implicitTrunc=*/true);
- if (!Scale.isZero())
- ApplyScaledOffset(getValue(V), Scale);
+ }
+ if (isa<ConstantAggregateZero>(V))
+ continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
+ unsigned ElementIdx = cast<ConstantInt>(V)->getZExtValue();
+ const StructLayout *SL = DL.getStructLayout(STy);
+ // Element offset is in bytes.
+ ApplyScaledOffset(APInt(IndexBitWidth, SL->getElementOffset(ElementIdx)),
+ APInt(IndexBitWidth, 1));
+ continue;
}
- setResult(GEP, std::move(Res));
+ // Truncate if type size exceeds index space.
+ // TODO: Should be documented in LangRef: GEPs with nowrap flags should
+ // return poison when the type size exceeds index space.
+ TypeSize Offset = GTI.getSequentialElementStride(DL);
+ APInt Scale(IndexBitWidth, Ctx.getEffectiveTypeSize(Offset),
+ /*isSigned=*/false, /*implicitTrunc=*/true);
+ if (!Scale.isZero())
+ ApplyScaledOffset(getValue(V), Scale);
}
- void visitIntToPtr(IntToPtrInst &I) {
- return visitUnOp(I, [&](const AnyValue &V) -> AnyValue {
- if (V.isPoison())
- return AnyValue::poison();
- // TODO: expose provenance
- // TODO: check metadata
- return Pointer(V.asInteger().zextOrTrunc(
- DL.getPointerSizeInBits(I.getType()->getPointerAddressSpace())));
- });
- }
+ setResult(GEP, std::move(Res));
+}
- void visitLoadInst(LoadInst &LI) {
- auto RetVal =
- load(getValue(LI.getPointerOperand()), LI.getAlign(), LI.getType());
- // TODO: track volatile loads
- // TODO: handle metadata
- setResult(LI, std::move(RetVal));
- }
+void InstExecutor::visitIntToPtr(IntToPtrInst &I) {
+ return visitUnOp(I, [&](const AnyValue &V) -> AnyValue {
+ if (V.isPoison())
+ return AnyValue::poison();
+ // TODO: expose provenance
+ // TODO: check metadata
+ return Pointer(V.asInteger().zextOrTrunc(
+ DL.getPointerSizeInBits(I.getType()->getPointerAddressSpace())));
+ });
+}
- void visitStoreInst(StoreInst &SI) {
- auto &Ptr = getValue(SI.getPointerOperand());
- auto &Val = getValue(SI.getValueOperand());
- // TODO: track volatile stores
- // TODO: handle metadata
- store(Ptr, SI.getAlign(), Val, SI.getValueOperand()->getType());
- if (Status)
- Status &= Handler.onInstructionExecuted(SI, AnyValue());
- }
+void InstExecutor::visitLoadInst(LoadInst &LI) {
+ auto RetVal =
+ load(getValue(LI.getPointerOperand()), LI.getAlign(), LI.getType());
+ // TODO: track volatile loads
+ // TODO: handle metadata
+ setResult(LI, std::move(RetVal));
+}
- void visitInstruction(Instruction &I) {
- Handler.onUnrecognizedInstruction(I);
- Status = false;
- }
+void InstExecutor::visitStoreInst(StoreInst &SI) {
+ auto &Ptr = getValue(SI.getPointerOperand());
+ auto &Val = getValue(SI.getValueOperand());
+ // TODO: track volatile stores
+ // TODO: handle metadata
+ store(Ptr, SI.getAlign(), Val, SI.getValueOperand()->getType());
+ if (Status)
+ Status &= Handler.onInstructionExecuted(SI, AnyValue());
+}
- void visitExtractValueInst(ExtractValueInst &EVI) {
- auto &Res = getValue(EVI.getAggregateOperand());
- const AnyValue *Pos = &Res;
- for (unsigned Idx : EVI.indices())
- Pos = &Pos->asAggregate()[Idx];
- setResult(EVI, *Pos);
- }
+void InstExecutor::visitInstruction(Instruction &I) {
+ Handler.onUnrecognizedInstruction(I);
+ Status = false;
+}
- void visitInsertValueInst(InsertValueInst &IVI) {
- AnyValue Res = getValue(IVI.getAggregateOperand());
- AnyValue *Pos = &Res;
- for (unsigned Idx : IVI.indices())
- Pos = &Pos->asAggregate()[Idx];
- *Pos = getValue(IVI.getInsertedValueOperand());
- setResult(IVI, std::move(Res));
- }
+void InstExecutor::visitExtractValueInst(ExtractValueInst &EVI) {
+ auto &Res = getValue(EVI.getAggregateOperand());
+ const AnyValue *Pos = &Res;
+ for (unsigned Idx : EVI.indices())
+ Pos = &Pos->asAggregate()[Idx];
+ setResult(EVI, *Pos);
+}
- void visitInsertElementInst(InsertElementInst &IEI) {
- auto Res = getValue(IEI.getOperand(0));
- auto &ResVec = Res.asAggregate();
- auto &Idx = getValue(IEI.getOperand(2));
- if (Idx.isPoison() || Idx.asInteger().uge(ResVec.size())) {
- setResult(IEI, AnyValue::getPoisonValue(Ctx, IEI.getType()));
- return;
- }
- ResVec[Idx.asInteger().getZExtValue()] = getValue(IEI.getOperand(1));
- setResult(IEI, std::move(Res));
+void InstExecutor::visitInsertValueInst(InsertValueInst &IVI) {
+ AnyValue Res = getValue(IVI.getAggregateOperand());
+ AnyValue *Pos = &Res;
+ for (unsigned Idx : IVI.indices())
+ Pos = &Pos->asAggregate()[Idx];
+ *Pos = getValue(IVI.getInsertedValueOperand());
+ setResult(IVI, std::move(Res));
+}
+
+void InstExecutor::visitInsertElementInst(InsertElementInst &IEI) {
+ auto Res = getValue(IEI.getOperand(0));
+ auto &ResVec = Res.asAggregate();
+ auto &Idx = getValue(IEI.getOperand(2));
+ if (Idx.isPoison() || Idx.asInteger().uge(ResVec.size())) {
+ setResult(IEI, AnyValue::getPoisonValue(Ctx, IEI.getType()));
+ return;
}
+ ResVec[Idx.asInteger().getZExtValue()] = getValue(IEI.getOperand(1));
+ setResult(IEI, std::move(Res));
+}
- void visitExtractElementInst(ExtractElementInst &EEI) {
- auto &SrcVec = getValue(EEI.getOperand(0)).asAggregate();
- auto &Idx = getValue(EEI.getOperand(1));
- if (Idx.isPoison() || Idx.asInteger().uge(SrcVec.size())) {
- setResult(EEI, AnyValue::getPoisonValue(Ctx, EEI.getType()));
- return;
- }
- setResult(EEI, SrcVec[Idx.asInteger().getZExtValue()]);
+void InstExecutor::visitExtractElementInst(ExtractElementInst &EEI) {
+ auto &SrcVec = getValue(EEI.getOperand(0)).asAggregate();
+ auto &Idx = getValue(EEI.getOperand(1));
+ if (Idx.isPoison() || Idx.asInteger().uge(SrcVec.size())) {
+ setResult(EEI, AnyValue::getPoisonValue(Ctx, EEI.getType()));
+ return;
}
+ setResult(EEI, SrcVec[Idx.asInteger().getZExtValue()]);
+}
- void visitShuffleVectorInst(ShuffleVectorInst &SVI) {
- auto &LHSVec = getValue(SVI.getOperand(0)).asAggregate();
- auto &RHSVec = getValue(SVI.getOperand(1)).asAggregate();
- uint32_t Size = cast<VectorType>(SVI.getOperand(0)->getType())
- ->getElementCount()
- .getKnownMinValue();
- std::vector<AnyValue> Res;
- uint32_t DstLen = Ctx.getEVL(SVI.getType()->getElementCount());
- Res.reserve(DstLen);
- uint32_t Stride = SVI.getShuffleMask().size();
- // For scalable vectors, we need to repeat the shuffle mask until we fill
- // the destination vector.
- for (uint32_t Off = 0; Off != DstLen; Off += Stride) {
- for (int Idx : SVI.getShuffleMask()) {
- if (Idx == PoisonMaskElem)
- Res.push_back(AnyValue::poison());
- else if (Idx < static_cast<int>(Size))
- Res.push_back(LHSVec[Idx]);
- else
- Res.push_back(RHSVec[Idx - Size]);
- }
+void InstExecutor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ auto &LHSVec = getValue(SVI.getOperand(0)).asAggregate();
+ auto &RHSVec = getValue(SVI.getOperand(1)).asAggregate();
+ uint32_t Size = cast<VectorType>(SVI.getOperand(0)->getType())
+ ->getElementCount()
+ .getKnownMinValue();
+ std::vector<AnyValue> Res;
+ uint32_t DstLen = Ctx.getEVL(SVI.getType()->getElementCount());
+ Res.reserve(DstLen);
+ uint32_t Stride = SVI.getShuffleMask().size();
+ // For scalable vectors, we need to repeat the shuffle mask until we fill
+ // the destination vector.
+ for (uint32_t Off = 0; Off != DstLen; Off += Stride) {
+ for (int Idx : SVI.getShuffleMask()) {
+ if (Idx == PoisonMaskElem)
+ Res.push_back(AnyValue::poison());
+ else if (Idx < static_cast<int>(Size))
+ Res.push_back(LHSVec[Idx]);
+ else
+ Res.push_back(RHSVec[Idx - Size]);
}
- setResult(SVI, std::move(Res));
}
+ setResult(SVI, std::move(Res));
+}
- /// This function implements the main interpreter loop.
- /// It handles function calls in a non-recursive manner to avoid stack
- /// overflows.
- bool runMainLoop() {
- uint32_t MaxSteps = Ctx.getMaxSteps();
- uint32_t Steps = 0;
- while (Status && !CallStack.empty()) {
- Frame &Top = CallStack.back();
- CurrentFrame = &Top;
- if (Top.State == FrameState::Entry) {
- Handler.onFunctionEntry(Top.Func, Top.Args, Top.CallSite);
- } else {
- assert(Top.State == FrameState::Pending &&
- "Expected to return from a callee.");
- returnFromCallee();
- }
+/// This function implements the main interpreter loop.
+/// It handles function calls in a non-recursive manner to avoid stack
+/// overflows.
+ExecutionStatus InstExecutor::runMainLoop() {
+ uint32_t MaxSteps = Ctx.getMaxSteps();
+ uint32_t Steps = 0;
+ while (Status && !CallStack.empty()) {
+ Frame &Top = CallStack.back();
+ CurrentFrame = &Top;
+ if (Top.State == FrameState::Entry) {
+ Handler.onFunctionEntry(Top.Func, Top.Args, Top.CallSite);
+ } else {
+ assert(Top.State == FrameState::Pending &&
+ "Expected to return from a callee.");
+ returnFromCallee();
+ }
- Top.State = FrameState::Running;
- // Interpreter loop inside a function
- while (Status) {
- assert(Top.State == FrameState::Running &&
- "Expected to be in running state.");
- if (MaxSteps != 0 && Steps >= MaxSteps) {
- reportError("Exceeded maximum number of execution steps.");
- break;
- }
- ++Steps;
-
- Instruction &I = *Top.PC;
- visit(&I);
- if (!Status)
- break;
-
- // A function call or return has occurred.
- // We need to exit the inner loop and switch to a different frame.
- if (Top.State != FrameState::Running)
- break;
-
- // Otherwise, move to the next instruction if it is not a terminator.
- // For terminators, the PC is updated in the visit* method.
- if (!I.isTerminator())
- ++Top.PC;
+ Top.State = FrameState::Running;
+ // Interpreter loop inside a function
+ while (Status) {
+ assert(Top.State == FrameState::Running &&
+ "Expected to be in running state.");
+ if (MaxSteps != 0 && Steps >= MaxSteps) {
+ reportError("Exceeded maximum number of execution steps.");
+ break;
}
+ ++Steps;
+ Instruction &I = *Top.PC;
+ visit(&I);
if (!Status)
break;
- if (Top.State == FrameState::Exit) {
- assert((Top.Func.getReturnType()->isVoidTy() || !Top.RetVal.isNone()) &&
- "Expected return value to be set on function exit.");
- Handler.onFunctionExit(Top.Func, Top.RetVal);
- // Free stack objects allocated in this frame.
- for (auto &Obj : Top.Allocas)
- Ctx.free(Obj->getAddress());
- CallStack.pop_back();
- } else {
- assert(Top.State == FrameState::Pending &&
- "Expected to enter a callee.");
- }
+ // A function call or return has occurred.
+ // We need to exit the inner loop and switch to a different frame.
+ if (Top.State != FrameState::Running)
+ break;
+
+ // Otherwise, move to the next instruction if it is not a terminator.
+ // For terminators, the PC is updated in the visit* method.
+ if (!I.isTerminator())
+ ++Top.PC;
+ }
+
+ if (!Status)
+ break;
+
+ if (Top.State == FrameState::Exit) {
+ assert((Top.Func.getReturnType()->isVoidTy() || !Top.RetVal.isNone()) &&
+ "Expected return value to be set on function exit.");
+ Handler.onFunctionExit(Top.Func, Top.RetVal);
+ // Free stack objects allocated in this frame.
+ for (auto &Obj : Top.Allocas)
+ Ctx.free(Obj->getAddress());
+ CallStack.pop_back();
+ } else {
+ assert(Top.State == FrameState::Pending && "Expected to enter a callee.");
}
- return Status;
}
-};
+ if (hasProgramExit())
+ return ExecutionStatus::ProgramExited;
+ return Status ? ExecutionStatus::Completed : ExecutionStatus::Failed;
+}
-bool Context::runFunction(Function &F, ArrayRef<AnyValue> Args,
- AnyValue &RetVal, EventHandler &Handler) {
+ExecutionStatus Context::runFunction(Function &F, ArrayRef<AnyValue> Args,
+ AnyValue &RetVal, EventHandler &Handler,
+ std::optional<ProgramExitInfo> *ExitInfo) {
InstExecutor Executor(*this, Handler, F, Args, RetVal);
- return Executor.runMainLoop();
+ ExecutionStatus Status = Executor.runMainLoop();
+ if (ExitInfo)
+ *ExitInfo = Executor.getProgramExitInfo();
+ return Status;
}
} // namespace llvm::ubi
+
diff --git a/llvm/tools/llubi/lib/Interpreter.h b/llvm/tools/llubi/lib/Interpreter.h
new file mode 100644
index 0000000000000..64c3fc06721ce
--- /dev/null
+++ b/llvm/tools/llubi/lib/Interpreter.h
@@ -0,0 +1,211 @@
+//===--- Interpreter.h - Interpreter Loop for llubi -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLUBI_INSTEXECUTOR_H
+#define LLVM_TOOLS_LLUBI_INSTEXECUTOR_H
+
+#include "Context.h"
+#include "Value.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/InstVisitor.h"
+
+namespace llvm::ubi {
+
+enum class FrameState {
+ // It is about to enter the function.
+ // Valid transition:
+ // -> Running
+ Entry,
+ // It is executing instructions inside the function.
+ // Valid transitions:
+ // -> Pending (on call)
+ // -> Exit (on return)
+ Running,
+ // It is about to enter a callee or handle return value from the callee.
+ // Valid transitions:
+ // -> Running (after returning from callee)
+ Pending,
+ // It is about to return the control to the caller.
+ Exit,
+};
+
+/// Context for a function call.
+/// This struct maintains the state during the execution of a function,
+/// including the control flow, values of executed instructions, and stack
+/// objects.
+struct Frame {
+ Function &Func;
+ Frame *LastFrame;
+ CallBase *CallSite;
+ ArrayRef<AnyValue> Args;
+ AnyValue &RetVal;
+
+ TargetLibraryInfo TLI;
+ BasicBlock *BB;
+ BasicBlock::iterator PC;
+ FrameState State = FrameState::Entry;
+ // Stack objects allocated in this frame. They will be automatically freed
+ // when the function returns.
+ SmallVector<IntrusiveRefCntPtr<MemoryObject>> Allocas;
+ // Values of arguments and executed instructions in this function.
+ DenseMap<Value *, AnyValue> ValueMap;
+
+ // Reserved for in-flight subroutines.
+ Function *ResolvedCallee = nullptr;
+ SmallVector<AnyValue> CalleeArgs;
+ AnyValue CalleeRetVal;
+
+ Frame(Function &F, CallBase *CallSite, Frame *LastFrame,
+ ArrayRef<AnyValue> Args, AnyValue &RetVal,
+ const TargetLibraryInfoImpl &TLIImpl)
+ : Func(F), LastFrame(LastFrame), CallSite(CallSite), Args(Args),
+ RetVal(RetVal), TLI(TLIImpl, &F) {
+ assert((Args.size() == F.arg_size() ||
+ (F.isVarArg() && Args.size() >= F.arg_size())) &&
+ "Expected enough arguments to call the function.");
+ BB = &Func.getEntryBlock();
+ PC = BB->begin();
+ for (Argument &Arg : F.args())
+ ValueMap[&Arg] = Args[Arg.getArgNo()];
+ }
+};
+
+/// Instruction executor using the visitor pattern.
+/// Unlike the Context class that manages the global state,
+/// InstExecutor only maintains the state for call frames.
+class InstExecutor : public InstVisitor<InstExecutor, void> {
+ Context &Ctx;
+ const DataLayout &DL;
+ EventHandler &Handler;
+ std::list<Frame> CallStack;
+ // Used to indicate whether the interpreter should continue execution.
+ bool Status;
+ std::optional<ProgramExitInfo> ExitInfo;
+ Frame *CurrentFrame = nullptr;
+ AnyValue None;
+
+ void reportImmediateUB(StringRef Msg);
+ void reportError(StringRef Msg);
+
+ /// Check if the upcoming memory access is valid. Returns the offset relative
+ /// to the underlying object if it is valid.
+ std::optional<uint64_t> verifyMemAccess(const MemoryObject &MO,
+ const APInt &Address,
+ uint64_t AccessSize, Align Alignment,
+ bool IsStore);
+
+ const AnyValue &getValue(Value *V);
+ void setResult(Instruction &I, AnyValue V);
+
+ AnyValue computeUnOp(Type *Ty, const AnyValue &Operand,
+ function_ref<AnyValue(const AnyValue &)> ScalarFn);
+ void visitUnOp(Instruction &I,
+ function_ref<AnyValue(const AnyValue &)> ScalarFn);
+
+ void visitIntUnOp(Instruction &I,
+ function_ref<AnyValue(const APInt &)> ScalarFn);
+
+ AnyValue computeBinOp(
+ Type *Ty, const AnyValue &LHS, const AnyValue &RHS,
+ function_ref<AnyValue(const AnyValue &, const AnyValue &)> ScalarFn);
+ void visitBinOp(
+ Instruction &I,
+ function_ref<AnyValue(const AnyValue &, const AnyValue &)> ScalarFn);
+
+ void
+ visitIntBinOp(Instruction &I,
+ function_ref<AnyValue(const APInt &, const APInt &)> ScalarFn);
+
+ void jumpTo(Instruction &Terminator, BasicBlock *DestBB);
+
+ /// Helper function to determine whether an inline asm is a no-op, which is
+ /// used to implement black_box style optimization blockers.
+ bool isNoopInlineAsm(Value *V, Type *RetTy);
+
+ AnyValue load(const AnyValue &Ptr, Align Alignment, Type *ValTy);
+ void store(const AnyValue &Ptr, Align Alignment, const AnyValue &Val,
+ Type *ValTy);
+
+ AnyValue computePtrAdd(const Pointer &Ptr, const APInt &Offset,
+ GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset);
+ AnyValue computePtrAdd(const AnyValue &Ptr, const APInt &Offset,
+ GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset);
+ AnyValue computeScaledPtrAdd(const AnyValue &Ptr, const AnyValue &Index,
+ const APInt &Scale, GEPNoWrapFlags Flags,
+ AnyValue &AccumulatedOffset);
+
+ AnyValue canonicalizeIndex(const AnyValue &Idx, unsigned IndexBitWidth,
+ GEPNoWrapFlags Flags);
+
+ friend class LibraryEnvironment;
+
+public:
+ InstExecutor(Context &C, EventHandler &H, Function &F,
+ ArrayRef<AnyValue> Args, AnyValue &RetVal)
+ : Ctx(C), DL(Ctx.getDataLayout()), Handler(H), Status(true) {
+ CallStack.emplace_back(F, /*CallSite=*/nullptr, /*LastFrame=*/nullptr, Args,
+ RetVal, Ctx.getTLIImpl());
+ }
+
+ void visitReturnInst(ReturnInst &RI);
+ void visitBranchInst(BranchInst &BI);
+ void visitSwitchInst(SwitchInst &SI);
+ void visitUnreachableInst(UnreachableInst &);
+ void visitCallBrInst(CallBrInst &CI);
+ void visitIndirectBrInst(IndirectBrInst &IBI);
+
+ void returnFromCallee();
+
+ AnyValue callIntrinsic(CallBase &CB);
+ AnyValue callLibFunc(CallBase &CB, Function *ResolvedCallee);
+
+ void requestProgramExit(ProgramExitKind Kind, uint64_t ExitCode = 0);
+ bool hasProgramExit() const { return ExitInfo.has_value(); }
+ std::optional<ProgramExitInfo> getProgramExitInfo() const { return ExitInfo; }
+
+ void enterCall(CallBase &CB);
+ void visitCallInst(CallInst &CI);
+ void visitInvokeInst(InvokeInst &II);
+ void visitAdd(BinaryOperator &I);
+ void visitSub(BinaryOperator &I);
+ void visitMul(BinaryOperator &I);
+ void visitSDiv(BinaryOperator &I);
+ void visitSRem(BinaryOperator &I);
+ void visitUDiv(BinaryOperator &I);
+ void visitURem(BinaryOperator &I);
+ void visitTruncInst(TruncInst &Trunc);
+ void visitZExtInst(ZExtInst &ZExt);
+ void visitSExtInst(SExtInst &SExt);
+ void visitAnd(BinaryOperator &I);
+ void visitXor(BinaryOperator &I);
+ void visitOr(BinaryOperator &I);
+ void visitShl(BinaryOperator &I);
+ void visitLShr(BinaryOperator &I);
+ void visitAShr(BinaryOperator &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitSelect(SelectInst &SI);
+ void visitAllocaInst(AllocaInst &AI);
+ void visitGetElementPtrInst(GetElementPtrInst &GEP);
+ void visitIntToPtr(IntToPtrInst &I);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void visitInstruction(Instruction &I);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+ void visitInsertElementInst(InsertElementInst &IEI);
+ void visitExtractElementInst(ExtractElementInst &EEI);
+ void visitShuffleVectorInst(ShuffleVectorInst &SVI);
+
+ /// This function implements the main interpreter loop.
+ /// It handles function calls in a non-recursive manner to avoid stack
+ /// overflows.
+ ExecutionStatus runMainLoop();
+};
+} // namespace llvm::ubi
+
+#endif
diff --git a/llvm/tools/llubi/lib/Library.cpp b/llvm/tools/llubi/lib/Library.cpp
new file mode 100644
index 0000000000000..0faca1940dcf6
--- /dev/null
+++ b/llvm/tools/llubi/lib/Library.cpp
@@ -0,0 +1,327 @@
+//===- Library.cpp - Library Function Simulator for llubi -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Library.h"
+#include <cstdarg>
+
+namespace llvm::ubi {
+static uint64_t getMaxAlignT(const DataLayout &DL) {
+ return DL.getPointerABIAlignment(0).value() >= 8 ? 16 : 8;
+}
+
+std::optional<std::string>
+LibraryEnvironment::readStringFromMemory(const Pointer &Ptr) {
+ auto *MO = Ptr.getMemoryObject();
+ if (!MO) {
+ Executor.reportImmediateUB(
+ "Invalid memory access via a pointer with nullary "
+ "provenance.");
+ return std::nullopt;
+ }
+
+ std::string Result;
+ const uint64_t Address = Ptr.address().getZExtValue();
+ uint64_t Offset = 0;
+
+ while (true) {
+ auto ValidOffset = Executor.verifyMemAccess(
+ *MO, APInt(DL.getPointerSizeInBits(0), Address + Offset), 1, Align(1),
+ false);
+ if (!ValidOffset) {
+ return std::nullopt;
+ }
+
+ Byte B = (*MO)[*ValidOffset];
+ if (B.ConcreteMask != 0xFF) {
+ Executor.reportImmediateUB("Read uninitialized or poison memory while "
+ "parsing C-string.");
+ return std::nullopt;
+ }
+
+ if (B.Value == 0) {
+ break;
+ }
+
+ Result.push_back(static_cast<char>(B.Value));
+ ++Offset;
+ }
+
+ return Result;
+}
+std::optional<AnyValue> LibraryEnvironment::executeMalloc(CallBase &CB) {
+ const auto SizeVal = Executor.getValue(CB.getArgOperand(0));
+ if (SizeVal.isPoison()) {
+ Executor.reportImmediateUB("malloc called with a poison size.");
+ return std::nullopt;
+ }
+
+ const uint64_t AllocSize = SizeVal.asInteger().getZExtValue();
+ const uint64_t MaxAlign = getMaxAlignT(DL);
+
+ const auto Obj = Ctx.allocate(AllocSize, MaxAlign, CB.getName(), 0,
+ MemInitKind::Uninitialized);
+
+ if (!Obj)
+ return AnyValue::getNullValue(Ctx, CB.getType());
+
+ return Ctx.deriveFromMemoryObject(Obj);
+}
+std::optional<AnyValue> LibraryEnvironment::executeCalloc(CallBase &CB) {
+ const auto CountVal = Executor.getValue(CB.getArgOperand(0));
+ const auto SizeVal = Executor.getValue(CB.getArgOperand(1));
+
+ if (CountVal.isPoison()) {
+ Executor.reportImmediateUB("calloc called with a poison count.");
+ return std::nullopt;
+ }
+ if (SizeVal.isPoison()) {
+ Executor.reportImmediateUB("calloc called with a poison size.");
+ return std::nullopt;
+ }
+
+ const uint64_t Count = CountVal.asInteger().getZExtValue();
+ const uint64_t Size = SizeVal.asInteger().getZExtValue();
+
+ bool Overflow;
+ const uint64_t AllocSize = SaturatingMultiply(Count, Size, &Overflow);
+ if (Overflow) {
+ return AnyValue::getNullValue(Ctx, CB.getType());
+ }
+
+ const uint64_t MaxAlign = getMaxAlignT(DL);
+
+ auto Obj =
+ Ctx.allocate(AllocSize, MaxAlign, CB.getName(), 0, MemInitKind::Zeroed);
+
+ if (!Obj) {
+ return AnyValue::getNullValue(Ctx, CB.getType());
+ }
+
+ return Ctx.deriveFromMemoryObject(Obj);
+}
+std::optional<AnyValue> LibraryEnvironment::executeFree(CallBase &CB) {
+ const auto PtrVal = Executor.getValue(CB.getArgOperand(0));
+ if (PtrVal.isPoison()) {
+ Executor.reportImmediateUB("free called with a poison pointer.");
+ return std::nullopt;
+ }
+
+ auto &Ptr = PtrVal.asPointer();
+ if (Ptr.address().isZero()) {
+ // no-op when free is called with a null pointer.
+ return AnyValue();
+ }
+
+ if (!Ctx.free(Ptr.address().getZExtValue())) {
+ Executor.reportImmediateUB(
+ "freeing an invalid, unallocated, or already freed pointer.");
+ return std::nullopt;
+ }
+
+ return AnyValue();
+}
+std::optional<AnyValue> LibraryEnvironment::executePuts(CallBase &CB) {
+ const auto PtrVal = Executor.getValue(CB.getArgOperand(0));
+ if (PtrVal.isPoison()) {
+ Executor.reportImmediateUB("puts called with a poison pointer.");
+ return std::nullopt;
+ }
+
+ const auto StrOpt = readStringFromMemory(PtrVal.asPointer());
+ if (!StrOpt) {
+ return std::nullopt;
+ }
+
+ Handler.onPrint(*StrOpt + "\n");
+ return AnyValue(APInt(32, 1));
+}
+std::optional<AnyValue> LibraryEnvironment::executePrintf(CallBase &CB) {
+ auto FormatPtrVal = Executor.getValue(CB.getArgOperand(0));
+ if (FormatPtrVal.isPoison()) {
+ Executor.reportImmediateUB(
+ "printf called with a poison format string pointer.");
+ return std::nullopt;
+ }
+
+ auto FormatStrOpt = readStringFromMemory(FormatPtrVal.asPointer());
+ if (!FormatStrOpt) {
+ return std::nullopt;
+ }
+
+ std::string FormatStr = *FormatStrOpt;
+ std::string Output;
+ unsigned ArgIndex = 1; // Start from 1 since 0 is the format string.
+
+ for (size_t i = 0; i < FormatStr.size();) {
+ if (FormatStr[i] != '%') {
+ Output.push_back(FormatStr[i++]);
+ continue;
+ }
+
+ size_t Start = i++;
+ if (i < FormatStr.size() && FormatStr[i] == '%') {
+ Output.push_back('%');
+ ++i;
+ continue;
+ }
+
+ while (i < FormatStr.size() && strchr("-= #0123456789", FormatStr[i])) {
+ ++i;
+ }
+
+ while (i < FormatStr.size() && strchr("hljzt", FormatStr[i])) {
+ ++i;
+ }
+
+ if (i >= FormatStr.size()) {
+ Executor.reportImmediateUB(
+ "Invalid format string in printf: missing conversion "
+ "specifier.");
+ return std::nullopt;
+ }
+
+ char Specifier = FormatStr[i++];
+ std::string CleanChunk = FormatStr.substr(Start, i - Start - 1);
+ CleanChunk.erase(std::remove_if(CleanChunk.begin(), CleanChunk.end(),
+ [](char c) { return strchr("hljzt", c); }),
+ CleanChunk.end());
+
+ if (ArgIndex >= CB.arg_size()) {
+ Executor.reportImmediateUB(
+ "Not enough arguments provided for the format string.");
+ return std::nullopt;
+ }
+
+ AnyValue Arg = Executor.getValue(CB.getArgOperand(ArgIndex++));
+ if (Arg.isPoison()) {
+ Executor.reportImmediateUB("Poison argument passed to printf.");
+ return std::nullopt;
+ }
+
+ char Buf[1024];
+ switch (Specifier) {
+ case 'd':
+ case 'i': {
+ std::string HostFmt = CleanChunk + "ll" + Specifier;
+ snprintf(Buf, sizeof(Buf), HostFmt.c_str(),
+ static_cast<long long>(Arg.asInteger().getSExtValue()));
+ Output += Buf;
+ break;
+ }
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'c': {
+ std::string HostFmt = CleanChunk + "ll" + Specifier;
+ snprintf(Buf, sizeof(Buf), HostFmt.c_str(),
+ static_cast<unsigned long long>(Arg.asInteger().getZExtValue()));
+ Output += Buf;
+ break;
+ }
+ case 'f':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G': {
+ std::string HostFmt = CleanChunk + Specifier;
+ snprintf(Buf, sizeof(Buf), HostFmt.c_str(),
+ Arg.asFloat().convertToDouble());
+ Output += Buf;
+ break;
+ }
+ case 'p': {
+ std::string HostFmt = CleanChunk + "llx";
+ snprintf(Buf, sizeof(Buf), HostFmt.c_str(),
+ static_cast<unsigned long long>(
+ Arg.asPointer().address().getZExtValue()));
+ Output += "0x";
+ Output += Buf;
+ break;
+ }
+ case 's': {
+ auto StrOpt = readStringFromMemory(Arg.asPointer());
+ if (!StrOpt)
+ return std::nullopt;
+ std::string HostFmt = CleanChunk + "s";
+ snprintf(Buf, sizeof(Buf), HostFmt.c_str(), StrOpt->c_str());
+ Output += Buf;
+ break;
+ }
+ default:
+ Executor.reportImmediateUB("Unknown format specifier in printf.");
+ return std::nullopt;
+ }
+ }
+
+ Handler.onPrint(Output);
+ return AnyValue(APInt(32, Output.size()));
+}
+std::optional<AnyValue> LibraryEnvironment::executeExit(CallBase &CB) {
+ const auto CodeVal = Executor.getValue(CB.getArgOperand(0));
+ if (CodeVal.isPoison()) {
+ Executor.reportImmediateUB("exit called with a poison code.");
+ return std::nullopt;
+ }
+ Executor.requestProgramExit(ProgramExitKind::Exit,
+ CodeVal.asInteger().getZExtValue());
+ return std::nullopt;
+}
+std::optional<AnyValue> LibraryEnvironment::executeAbort(CallBase &CB) {
+ Executor.requestProgramExit(ProgramExitKind::Abort);
+ return std::nullopt;
+}
+std::optional<AnyValue> LibraryEnvironment::executeTerminate(CallBase &CB) {
+ Executor.requestProgramExit(ProgramExitKind::Terminate);
+ return std::nullopt;
+}
+std::optional<AnyValue> LibraryEnvironment::call(LibFunc LF, CallBase &CB) {
+ switch (LF) {
+ case LibFunc_malloc:
+ case LibFunc_Znwm:
+ case LibFunc_Znam: {
+ return executeMalloc(CB);
+ }
+
+ case LibFunc_calloc: {
+ return executeCalloc(CB);
+ }
+
+ case LibFunc_free:
+ case LibFunc_ZdaPv:
+ case LibFunc_ZdlPv: {
+ return executeFree(CB);
+ }
+
+ case LibFunc_puts: {
+ return executePuts(CB);
+ }
+
+ case LibFunc_printf: {
+ return executePrintf(CB);
+ }
+
+ case LibFunc_exit: {
+ return executeExit(CB);
+ }
+
+ case LibFunc_abort: {
+ return executeAbort(CB);
+ }
+
+ case LibFunc_terminate: {
+ return executeTerminate(CB);
+ }
+
+ default: {
+ return std::nullopt;
+ }
+ }
+}
+} // namespace llvm::ubi
+
diff --git a/llvm/tools/llubi/lib/Library.h b/llvm/tools/llubi/lib/Library.h
new file mode 100644
index 0000000000000..1411db9169ec3
--- /dev/null
+++ b/llvm/tools/llubi/lib/Library.h
@@ -0,0 +1,51 @@
+//===--- Library.h - Library Function Simulator for llubi -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLUBI_LIBRARY_H
+#define LLVM_TOOLS_LLUBI_LIBRARY_H
+
+#include "Context.h"
+#include "Interpreter.h"
+#include "Value.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/InstrTypes.h"
+#include <string>
+
+namespace llvm::ubi {
+
+class LibraryEnvironment {
+ Context &Ctx;
+ EventHandler &Handler;
+ const DataLayout &DL;
+ InstExecutor &Executor;
+
+ std::optional<std::string> readStringFromMemory(const Pointer &Ptr);
+
+ std::optional<AnyValue> executeMalloc(CallBase &CB);
+ std::optional<AnyValue> executeCalloc(CallBase &CB);
+ std::optional<AnyValue> executeFree(CallBase &CB);
+ std::optional<AnyValue> executePuts(CallBase &CB);
+ std::optional<AnyValue> executePrintf(CallBase &CB);
+ std::optional<AnyValue> executeExit(CallBase &CB);
+ std::optional<AnyValue> executeAbort(CallBase &CB);
+ std::optional<AnyValue> executeTerminate(CallBase &CB);
+
+public:
+ LibraryEnvironment(Context &C, EventHandler &H, const DataLayout &DL,
+ InstExecutor &Executor)
+ : Ctx(C), Handler(H), DL(DL), Executor(Executor) {}
+
+ /// Simulates a standard library call. Returns std::nullopt to indicate that
+ /// execution should halt (either due to an exit/abort call or an immediate
+ /// UB trigger).
+ std::optional<AnyValue> call(LibFunc LF, CallBase &CB);
+};
+
+} // namespace llvm::ubi
+
+#endif
\ No newline at end of file
diff --git a/llvm/tools/llubi/llubi.cpp b/llvm/tools/llubi/llubi.cpp
index de76a7e64c27b..50cfbc9cfcb37 100644
--- a/llvm/tools/llubi/llubi.cpp
+++ b/llvm/tools/llubi/llubi.cpp
@@ -131,6 +131,22 @@ class VerboseEventHandler : public ubi::EventHandler {
return true;
}
+ bool onProgramExit(ubi::ProgramExitKind Kind, uint64_t ExitCode) override {
+ switch (Kind) {
+ case ubi::ProgramExitKind::Returned:
+ case ubi::ProgramExitKind::Exit:
+ errs() << "Program exited with code " << ExitCode << '\n';
+ return true;
+ case ubi::ProgramExitKind::Abort:
+ errs() << "Program aborted.\n";
+ return true;
+ case ubi::ProgramExitKind::Terminate:
+ errs() << "Program terminated.\n";
+ return true;
+ }
+ llvm_unreachable("Unknown ProgramExitKind");
+ }
+
void onUnrecognizedInstruction(Instruction &I) override {
errs() << "Unrecognized instruction: " << I << '\n';
}
@@ -239,14 +255,33 @@ int main(int argc, char **argv) {
ubi::EventHandler NoopHandler;
VerboseEventHandler VerboseHandler;
+ ubi::EventHandler &Handler =
+ Verbose ? static_cast<ubi::EventHandler &>(VerboseHandler)
+ : static_cast<ubi::EventHandler &>(NoopHandler);
ubi::AnyValue RetVal;
- if (!Ctx.runFunction(*EntryFn, Args, RetVal,
- Verbose ? VerboseHandler : NoopHandler)) {
+ std::optional<ubi::ProgramExitInfo> ProgramExit;
+ const auto ExecStatus =
+ Ctx.runFunction(*EntryFn, Args, RetVal, Handler, &ProgramExit);
+ if (ExecStatus == ubi::ExecutionStatus::Failed) {
WithColor::error() << "Execution of function '" << EntryFunc
<< "' failed.\n";
return 1;
}
+ if (ExecStatus == ubi::ExecutionStatus::ProgramExited) {
+ assert(ProgramExit && "Expected program exit information");
+ switch (ProgramExit->Kind) {
+ case ubi::ProgramExitKind::Exit:
+ return static_cast<int>(ProgramExit->ExitCode & 0xFF);
+ case ubi::ProgramExitKind::Abort:
+ case ubi::ProgramExitKind::Terminate:
+ return 1;
+ case ubi::ProgramExitKind::Returned:
+ llvm_unreachable("Unexpected returned kind for ProgramExited status");
+ }
+ }
+
+ uint64_t ExitCode = 0;
// If the function returns an integer, return that as the exit code.
if (EntryFn->getReturnType()->isIntegerTy()) {
assert(!RetVal.isNone() && "Expected a return value from entry function");
@@ -256,8 +291,12 @@ int main(int argc, char **argv) {
return 1;
}
APInt Result = RetVal.asInteger();
- return (int)Result.extractBitsAsZExtValue(
- std::min(Result.getBitWidth(), 8U), 0);
+ ExitCode =
+ Result.extractBitsAsZExtValue(std::min(Result.getBitWidth(), 8U), 0);
}
- return 0;
+
+ if (!Handler.onProgramExit(ubi::ProgramExitKind::Returned, ExitCode))
+ return 1;
+
+ return static_cast<int>(ExitCode);
}
More information about the llvm-commits
mailing list